mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-31 20:34:54 +08:00 
			
		
		
		
	Compare commits
	
		
			4 Commits
		
	
	
		
			v2.6.0-rc2
			...
			dev/joona/
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| e931e664b0 | |||
| 0f942aab1f | |||
| 502d10d441 | |||
| eb5a0e1f93 | 
| @ -1 +1 @@ | ||||
| 6.5.0 | ||||
| 6.1.1 | ||||
|  | ||||
							
								
								
									
										26
									
								
								.buckconfig.oss
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								.buckconfig.oss
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,26 @@ | ||||
| [pt] | ||||
|   is_oss=1 | ||||
|  | ||||
| [buildfile] | ||||
|   name = BUCK.oss | ||||
|   includes = //tools/build_defs/select.bzl | ||||
|  | ||||
| [repositories] | ||||
|   bazel_skylib = third_party/bazel-skylib/ | ||||
|   ovr_config = . | ||||
|  | ||||
| [download] | ||||
|   in_build = true | ||||
|  | ||||
| [cxx] | ||||
|   cxxflags = -std=c++17 | ||||
|   ldflags = -Wl,--no-undefined | ||||
|   should_remap_host_platform = true | ||||
|   cpp = /usr/bin/clang | ||||
|   cc = /usr/bin/clang | ||||
|   cxx = /usr/bin/clang++ | ||||
|   cxxpp = /usr/bin/clang++ | ||||
|   ld = /usr/bin/clang++ | ||||
|  | ||||
| [project] | ||||
|   default_flavors_mode=all | ||||
| @ -1,19 +0,0 @@ | ||||
| # Aarch64 (ARM/Graviton) Support Scripts | ||||
| Scripts for building aarch64 PyTorch PIP Wheels. These scripts build the following wheels: | ||||
| * torch | ||||
| * torchvision | ||||
| * torchaudio | ||||
| * torchtext | ||||
| * torchdata | ||||
| ## Aarch64_ci_build.sh | ||||
| This script is design to support CD operations within PyPi manylinux aarch64 container, and be executed in the container. It prepares the container and then executes __aarch64_wheel_ci_build.py__ to build the wheels. The script "assumes" the PyTorch repo is located at: ```/pytorch``` and will put the wheels into ```/artifacts```. | ||||
| ### Usage | ||||
| ```DESIRED_PYTHON=<PythonVersion> aarch64_ci_build.sh``` | ||||
|  | ||||
| __NOTE:__ CI build is currently __EXPERMINTAL__ | ||||
|  | ||||
| ## Build_aarch64_wheel.py | ||||
| This app allows a person to build using AWS EC3 resources and requires AWS-CLI and Boto3 with AWS credentials to support building EC2 instances for the wheel builds. Can be used in a codebuild CD or from a local system. | ||||
|  | ||||
| ### Usage | ||||
| ```build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>``` | ||||
| @ -1,39 +0,0 @@ | ||||
| #!/bin/bash | ||||
| set -eux -o pipefail | ||||
|  | ||||
| GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-} | ||||
|  | ||||
| SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" | ||||
| source $SCRIPTPATH/aarch64_ci_setup.sh | ||||
|  | ||||
| tagged_version() { | ||||
|   GIT_DESCRIBE="git --git-dir /pytorch/.git describe --tags --match v[0-9]*.[0-9]*.[0-9]*" | ||||
|   if ${GIT_DESCRIBE} --exact >/dev/null; then | ||||
|     ${GIT_DESCRIBE} | ||||
|   else | ||||
|     return 1 | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if tagged_version >/dev/null; then | ||||
|   export OVERRIDE_PACKAGE_VERSION="$(tagged_version | sed -e 's/^v//' -e 's/-.*$//')" | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Run aarch64 builder python | ||||
| ############################################################################### | ||||
| cd / | ||||
| # adding safe directory for git as the permissions will be | ||||
| # on the mounted pytorch repo | ||||
| git config --global --add safe.directory /pytorch | ||||
| pip install -r /pytorch/requirements.txt | ||||
| pip install auditwheel | ||||
| if [ "$DESIRED_CUDA" = "cpu" ]; then | ||||
|     echo "BASE_CUDA_VERSION is not set. Building cpu wheel." | ||||
|     #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files | ||||
|     USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn | ||||
| else | ||||
|     echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA" | ||||
|     #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files | ||||
|     USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda | ||||
| fi | ||||
| @ -1,23 +0,0 @@ | ||||
| #!/bin/bash | ||||
| set -eux -o pipefail | ||||
|  | ||||
| # This script is used to prepare the Docker container for aarch64_ci_wheel_build.py python script | ||||
| # By creating symlinks from desired /opt/python to /usr/local/bin/ | ||||
|  | ||||
| NUMPY_VERSION=2.0.2 | ||||
| PYGIT2_VERSION=1.15.1 | ||||
| if [[ "$DESIRED_PYTHON"  == "3.13" ]]; then | ||||
|     NUMPY_VERSION=2.1.2 | ||||
|     PYGIT2_VERSION=1.16.0 | ||||
| fi | ||||
|  | ||||
| SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" | ||||
| source $SCRIPTPATH/../manywheel/set_desired_python.sh | ||||
|  | ||||
| pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2 pygit2==${PYGIT2_VERSION} | ||||
|  | ||||
| for tool in python python3 pip pip3 ninja scons patchelf; do | ||||
|     ln -sf ${DESIRED_PYTHON_BIN_DIR}/${tool} /usr/local/bin; | ||||
| done | ||||
|  | ||||
| python --version | ||||
| @ -1,230 +0,0 @@ | ||||
| #!/usr/bin/env python3 | ||||
| # encoding: UTF-8 | ||||
|  | ||||
| import os | ||||
| import shutil | ||||
| from subprocess import check_call, check_output | ||||
| from typing import List | ||||
|  | ||||
| from pygit2 import Repository | ||||
|  | ||||
|  | ||||
| def list_dir(path: str) -> List[str]: | ||||
|     """' | ||||
|     Helper for getting paths for Python | ||||
|     """ | ||||
|     return check_output(["ls", "-1", path]).decode().split("\n") | ||||
|  | ||||
|  | ||||
| def build_ArmComputeLibrary() -> None: | ||||
|     """ | ||||
|     Using ArmComputeLibrary for aarch64 PyTorch | ||||
|     """ | ||||
|     print("Building Arm Compute Library") | ||||
|     acl_build_flags = [ | ||||
|         "debug=0", | ||||
|         "neon=1", | ||||
|         "opencl=0", | ||||
|         "os=linux", | ||||
|         "openmp=1", | ||||
|         "cppthreads=0", | ||||
|         "arch=armv8a", | ||||
|         "multi_isa=1", | ||||
|         "fixed_format_kernels=1", | ||||
|         "build=native", | ||||
|     ] | ||||
|     acl_install_dir = "/acl" | ||||
|     acl_checkout_dir = "ComputeLibrary" | ||||
|     os.makedirs(acl_install_dir) | ||||
|     check_call( | ||||
|         [ | ||||
|             "git", | ||||
|             "clone", | ||||
|             "https://github.com/ARM-software/ComputeLibrary.git", | ||||
|             "-b", | ||||
|             "v24.09", | ||||
|             "--depth", | ||||
|             "1", | ||||
|             "--shallow-submodules", | ||||
|         ] | ||||
|     ) | ||||
|  | ||||
|     check_call( | ||||
|         ["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] | ||||
|         + acl_build_flags, | ||||
|         cwd=acl_checkout_dir, | ||||
|     ) | ||||
|     for d in ["arm_compute", "include", "utils", "support", "src"]: | ||||
|         shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") | ||||
|  | ||||
|  | ||||
| def update_wheel(wheel_path) -> None: | ||||
|     """ | ||||
|     Update the cuda wheel libraries | ||||
|     """ | ||||
|     folder = os.path.dirname(wheel_path) | ||||
|     wheelname = os.path.basename(wheel_path) | ||||
|     os.mkdir(f"{folder}/tmp") | ||||
|     os.system(f"unzip {wheel_path} -d {folder}/tmp") | ||||
|     libs_to_copy = [ | ||||
|         "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", | ||||
|         "/usr/local/cuda/lib64/libcudnn.so.9", | ||||
|         "/usr/local/cuda/lib64/libcublas.so.12", | ||||
|         "/usr/local/cuda/lib64/libcublasLt.so.12", | ||||
|         "/usr/local/cuda/lib64/libcudart.so.12", | ||||
|         "/usr/local/cuda/lib64/libcufft.so.11", | ||||
|         "/usr/local/cuda/lib64/libcusparse.so.12", | ||||
|         "/usr/local/cuda/lib64/libcusparseLt.so.0", | ||||
|         "/usr/local/cuda/lib64/libcusolver.so.11", | ||||
|         "/usr/local/cuda/lib64/libcurand.so.10", | ||||
|         "/usr/local/cuda/lib64/libnvToolsExt.so.1", | ||||
|         "/usr/local/cuda/lib64/libnvJitLink.so.12", | ||||
|         "/usr/local/cuda/lib64/libnvrtc.so.12", | ||||
|         "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6", | ||||
|         "/usr/local/cuda/lib64/libcudnn_adv.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_cnn.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_graph.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_ops.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_heuristic.so.9", | ||||
|         "/lib64/libgomp.so.1", | ||||
|         "/usr/lib64/libgfortran.so.5", | ||||
|         "/acl/build/libarm_compute.so", | ||||
|         "/acl/build/libarm_compute_graph.so", | ||||
|     ] | ||||
|     if enable_cuda: | ||||
|         libs_to_copy += [ | ||||
|             "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", | ||||
|             "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", | ||||
|             "/usr/local/lib/libnvpl_lapack_core.so.0", | ||||
|             "/usr/local/lib/libnvpl_blas_core.so.0", | ||||
|         ] | ||||
|     else: | ||||
|         libs_to_copy += [ | ||||
|             "/opt/OpenBLAS/lib/libopenblas.so.0", | ||||
|         ] | ||||
|     # Copy libraries to unzipped_folder/a/lib | ||||
|     for lib_path in libs_to_copy: | ||||
|         lib_name = os.path.basename(lib_path) | ||||
|         shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}") | ||||
|         os.system( | ||||
|             f"cd {folder}/tmp/torch/lib/; " | ||||
|             f"patchelf --set-rpath '$ORIGIN' --force-rpath {folder}/tmp/torch/lib/{lib_name}" | ||||
|         ) | ||||
|     os.mkdir(f"{folder}/cuda_wheel") | ||||
|     os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *") | ||||
|     shutil.move( | ||||
|         f"{folder}/cuda_wheel/{wheelname}", | ||||
|         f"{folder}/{wheelname}", | ||||
|         copy_function=shutil.copy2, | ||||
|     ) | ||||
|     os.system(f"rm -rf {folder}/tmp/ {folder}/cuda_wheel/") | ||||
|  | ||||
|  | ||||
| def complete_wheel(folder: str) -> str: | ||||
|     """ | ||||
|     Complete wheel build and put in artifact location | ||||
|     """ | ||||
|     wheel_name = list_dir(f"/{folder}/dist")[0] | ||||
|  | ||||
|     if "pytorch" in folder and not enable_cuda: | ||||
|         print("Repairing Wheel with AuditWheel") | ||||
|         check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder) | ||||
|         repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0] | ||||
|  | ||||
|         print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist") | ||||
|         os.rename( | ||||
|             f"/{folder}/wheelhouse/{repaired_wheel_name}", | ||||
|             f"/{folder}/dist/{repaired_wheel_name}", | ||||
|         ) | ||||
|     else: | ||||
|         repaired_wheel_name = wheel_name | ||||
|  | ||||
|     print(f"Copying {repaired_wheel_name} to artifacts") | ||||
|     shutil.copy2( | ||||
|         f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}" | ||||
|     ) | ||||
|  | ||||
|     return repaired_wheel_name | ||||
|  | ||||
|  | ||||
| def parse_arguments(): | ||||
|     """ | ||||
|     Parse inline arguments | ||||
|     """ | ||||
|     from argparse import ArgumentParser | ||||
|  | ||||
|     parser = ArgumentParser("AARCH64 wheels python CD") | ||||
|     parser.add_argument("--debug", action="store_true") | ||||
|     parser.add_argument("--build-only", action="store_true") | ||||
|     parser.add_argument("--test-only", type=str) | ||||
|     parser.add_argument("--enable-mkldnn", action="store_true") | ||||
|     parser.add_argument("--enable-cuda", action="store_true") | ||||
|     return parser.parse_args() | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     """ | ||||
|     Entry Point | ||||
|     """ | ||||
|     args = parse_arguments() | ||||
|     enable_mkldnn = args.enable_mkldnn | ||||
|     enable_cuda = args.enable_cuda | ||||
|     repo = Repository("/pytorch") | ||||
|     branch = repo.head.name | ||||
|     if branch == "HEAD": | ||||
|         branch = "master" | ||||
|  | ||||
|     print("Building PyTorch wheel") | ||||
|     build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " | ||||
|     os.system("cd /pytorch; python setup.py clean") | ||||
|  | ||||
|     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") | ||||
|     if override_package_version is not None: | ||||
|         version = override_package_version | ||||
|         build_vars += ( | ||||
|             f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 " | ||||
|         ) | ||||
|     elif branch in ["nightly", "master"]: | ||||
|         build_date = ( | ||||
|             check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch") | ||||
|             .decode() | ||||
|             .replace("-", "") | ||||
|         ) | ||||
|         version = ( | ||||
|             check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2] | ||||
|         ) | ||||
|         if enable_cuda: | ||||
|             desired_cuda = os.getenv("DESIRED_CUDA") | ||||
|             build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date}+{desired_cuda} PYTORCH_BUILD_NUMBER=1 " | ||||
|         else: | ||||
|             build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 " | ||||
|     elif branch.startswith(("v1.", "v2.")): | ||||
|         build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " | ||||
|  | ||||
|     if enable_mkldnn: | ||||
|         build_ArmComputeLibrary() | ||||
|         print("build pytorch with mkldnn+acl backend") | ||||
|         build_vars += ( | ||||
|             "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " | ||||
|             "ACL_ROOT_DIR=/acl " | ||||
|             "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " | ||||
|             "ACL_INCLUDE_DIR=/acl/build " | ||||
|             "ACL_LIBRARY=/acl/build " | ||||
|         ) | ||||
|         if enable_cuda: | ||||
|             build_vars += "BLAS=NVPL " | ||||
|         else: | ||||
|             build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS " | ||||
|     else: | ||||
|         print("build pytorch without mkldnn backend") | ||||
|  | ||||
|     os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel") | ||||
|     if enable_cuda: | ||||
|         print("Updating Cuda Dependency") | ||||
|         filename = os.listdir("/pytorch/dist/") | ||||
|         wheel_path = f"/pytorch/dist/{filename[0]}" | ||||
|         update_wheel(wheel_path) | ||||
|     pytorch_wheel_name = complete_wheel("/pytorch/") | ||||
|     print(f"Build Complete. Created {pytorch_wheel_name}..") | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -1,87 +0,0 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import os | ||||
| import shutil | ||||
| import sys | ||||
| from subprocess import check_call | ||||
| from tempfile import TemporaryDirectory | ||||
|  | ||||
| from auditwheel.elfutils import elf_file_filter | ||||
| from auditwheel.lddtree import lddtree | ||||
| from auditwheel.patcher import Patchelf | ||||
| from auditwheel.repair import copylib | ||||
| from auditwheel.wheeltools import InWheelCtx | ||||
|  | ||||
|  | ||||
| def replace_tag(filename): | ||||
|     with open(filename) as f: | ||||
|         lines = f.read().split("\\n") | ||||
|     for i, line in enumerate(lines): | ||||
|         if not line.startswith("Tag: "): | ||||
|             continue | ||||
|         lines[i] = line.replace("-linux_", "-manylinux2014_") | ||||
|         print(f"Updated tag from {line} to {lines[i]}") | ||||
|  | ||||
|     with open(filename, "w") as f: | ||||
|         f.write("\\n".join(lines)) | ||||
|  | ||||
|  | ||||
| class AlignedPatchelf(Patchelf): | ||||
|     def set_soname(self, file_name: str, new_soname: str) -> None: | ||||
|         check_call( | ||||
|             ["patchelf", "--page-size", "65536", "--set-soname", new_soname, file_name] | ||||
|         ) | ||||
|  | ||||
|     def replace_needed(self, file_name: str, soname: str, new_soname: str) -> None: | ||||
|         check_call( | ||||
|             [ | ||||
|                 "patchelf", | ||||
|                 "--page-size", | ||||
|                 "65536", | ||||
|                 "--replace-needed", | ||||
|                 soname, | ||||
|                 new_soname, | ||||
|                 file_name, | ||||
|             ] | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def embed_library(whl_path, lib_soname, update_tag=False): | ||||
|     patcher = AlignedPatchelf() | ||||
|     out_dir = TemporaryDirectory() | ||||
|     whl_name = os.path.basename(whl_path) | ||||
|     tmp_whl_name = os.path.join(out_dir.name, whl_name) | ||||
|     with InWheelCtx(whl_path) as ctx: | ||||
|         torchlib_path = os.path.join(ctx._tmpdir.name, "torch", "lib") | ||||
|         ctx.out_wheel = tmp_whl_name | ||||
|         new_lib_path, new_lib_soname = None, None | ||||
|         for filename, _ in elf_file_filter(ctx.iter_files()): | ||||
|             if not filename.startswith("torch/lib"): | ||||
|                 continue | ||||
|             libtree = lddtree(filename) | ||||
|             if lib_soname not in libtree["needed"]: | ||||
|                 continue | ||||
|             lib_path = libtree["libs"][lib_soname]["path"] | ||||
|             if lib_path is None: | ||||
|                 print(f"Can't embed {lib_soname} as it could not be found") | ||||
|                 break | ||||
|             if lib_path.startswith(torchlib_path): | ||||
|                 continue | ||||
|  | ||||
|             if new_lib_path is None: | ||||
|                 new_lib_soname, new_lib_path = copylib(lib_path, torchlib_path, patcher) | ||||
|             patcher.replace_needed(filename, lib_soname, new_lib_soname) | ||||
|             print(f"Replacing {lib_soname} with {new_lib_soname} for {filename}") | ||||
|         if update_tag: | ||||
|             # Add manylinux2014 tag | ||||
|             for filename in ctx.iter_files(): | ||||
|                 if os.path.basename(filename) != "WHEEL": | ||||
|                     continue | ||||
|                 replace_tag(filename) | ||||
|     shutil.move(tmp_whl_name, whl_path) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     embed_library( | ||||
|         sys.argv[1], "libgomp.so.1", len(sys.argv) > 2 and sys.argv[2] == "--update-tag" | ||||
|     ) | ||||
							
								
								
									
										1
									
								
								.ci/docker/android/AndroidManifest.xml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.ci/docker/android/AndroidManifest.xml
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| <manifest package="org.pytorch.deps" /> | ||||
							
								
								
									
										66
									
								
								.ci/docker/android/build.gradle
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								.ci/docker/android/build.gradle
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,66 @@ | ||||
| buildscript { | ||||
|     ext { | ||||
|         minSdkVersion = 21 | ||||
|         targetSdkVersion = 28 | ||||
|         compileSdkVersion = 28 | ||||
|         buildToolsVersion = '28.0.3' | ||||
|  | ||||
|         coreVersion = "1.2.0" | ||||
|         extJUnitVersion = "1.1.1" | ||||
|         runnerVersion = "1.2.0" | ||||
|         rulesVersion = "1.2.0" | ||||
|         junitVersion = "4.12" | ||||
|     } | ||||
|  | ||||
|     repositories { | ||||
|         google() | ||||
|         mavenLocal() | ||||
|         mavenCentral() | ||||
|         jcenter() | ||||
|     } | ||||
|  | ||||
|     dependencies { | ||||
|         classpath 'com.android.tools.build:gradle:4.1.2' | ||||
|         classpath 'com.vanniktech:gradle-maven-publish-plugin:0.14.2' | ||||
|     } | ||||
| } | ||||
|  | ||||
| repositories { | ||||
|     google() | ||||
|     jcenter() | ||||
| } | ||||
|  | ||||
| apply plugin: 'com.android.library' | ||||
|  | ||||
| android { | ||||
|     compileSdkVersion rootProject.compileSdkVersion | ||||
|     buildToolsVersion rootProject.buildToolsVersion | ||||
|  | ||||
|     defaultConfig { | ||||
|         minSdkVersion minSdkVersion | ||||
|         targetSdkVersion targetSdkVersion | ||||
|     } | ||||
|  | ||||
|     sourceSets { | ||||
|         main { | ||||
|             manifest.srcFile 'AndroidManifest.xml' | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| dependencies { | ||||
|     implementation 'com.android.support:appcompat-v7:28.0.0' | ||||
|     implementation 'androidx.appcompat:appcompat:1.0.0' | ||||
|     implementation 'com.facebook.fbjni:fbjni-java-only:0.2.2' | ||||
|     implementation 'com.google.code.findbugs:jsr305:3.0.1' | ||||
|     implementation 'com.facebook.soloader:nativeloader:0.10.5' | ||||
|  | ||||
|     implementation 'junit:junit:' + rootProject.junitVersion | ||||
|     implementation 'androidx.test:core:' + rootProject.coreVersion | ||||
|  | ||||
|     implementation 'junit:junit:' + rootProject.junitVersion | ||||
|     implementation 'androidx.test:core:' + rootProject.coreVersion | ||||
|     implementation 'androidx.test.ext:junit:' + rootProject.extJUnitVersion | ||||
|     implementation 'androidx.test:rules:' + rootProject.rulesVersion | ||||
|     implementation 'androidx.test:runner:' + rootProject.runnerVersion | ||||
| } | ||||
| @ -1,5 +1,5 @@ | ||||
| 0.8b | ||||
| manylinux_2_28 | ||||
| 0.6b | ||||
| manylinux_2_17 | ||||
| rocm6.2 | ||||
| 6f8cbcac8a92775291bb1ba8f514d4beb350baf4 | ||||
| e938def5d32869fe2e00aec0300f354c9f157867bebdf2e104d732b94cb238d8 | ||||
| 7f07e8a1cb1f99627eb6d77f5c0e9295c775f3c7 | ||||
| e4ab195d2bd19e939c675a13280c29714c6ef9f2cf420690da150fa0cac043b1 | ||||
|  | ||||
| @ -179,21 +179,6 @@ case "$image" in | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.4-cudnn9-py3.13-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.4.1 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.13 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9) | ||||
|     CUDA_VERSION=11.8.0 | ||||
|     CUDNN_VERSION=9 | ||||
| @ -259,6 +244,16 @@ case "$image" in | ||||
|     CONDA_CMAKE=yes | ||||
|     ONNX=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-py3-clang9-android-ndk-r21e) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CLANG_VERSION=9 | ||||
|     LLVMDEV=yes | ||||
|     PROTOBUF=yes | ||||
|     ANDROID=yes | ||||
|     ANDROID_NDK_VERSION=r21e | ||||
|     GRADLE_VERSION=6.8.3 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     ;; | ||||
|   pytorch-linux-focal-py3.9-clang10) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CLANG_VERSION=10 | ||||
| @ -291,23 +286,23 @@ case "$image" in | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-rocm-n-1-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     ANACONDA_PYTHON_VERSION=3.8 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=6.1 | ||||
|     ROCM_VERSION=6.0 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-rocm-n-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     ANACONDA_PYTHON_VERSION=3.8 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=6.2.4 | ||||
|     ROCM_VERSION=6.1 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
| @ -323,17 +318,6 @@ case "$image" in | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-xpu-2025.0-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     XPU_VERSION=2025.0 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|     pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
| @ -371,12 +355,6 @@ case "$image" in | ||||
|     CONDA_CMAKE=yes | ||||
|     VISION=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3-clang18-asan) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     CLANG_VERSION=18 | ||||
|     CONDA_CMAKE=yes | ||||
|     VISION=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3.9-gcc11) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
| @ -401,14 +379,6 @@ case "$image" in | ||||
|     GCC_VERSION=11 | ||||
|     CONDA_CMAKE=yes | ||||
|     HALIDE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3.12-triton-cpu) | ||||
|     CUDA_VERSION=12.4 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=11 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON_CPU=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-linter) | ||||
|     # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627. | ||||
| @ -430,6 +400,9 @@ case "$image" in | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     # snadampal: skipping sccache due to the following issue | ||||
|     # https://github.com/pytorch/pytorch/issues/121559 | ||||
|     SKIP_SCCACHE_INSTALL=yes | ||||
|     # snadampal: skipping llvm src build install because the current version | ||||
|     # from pytorch/llvm:9.0.1 is x86 specific | ||||
|     SKIP_LLVM_SRC_BUILD_INSTALL=yes | ||||
| @ -442,6 +415,9 @@ case "$image" in | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     # snadampal: skipping sccache due to the following issue | ||||
|     # https://github.com/pytorch/pytorch/issues/121559 | ||||
|     SKIP_SCCACHE_INSTALL=yes | ||||
|     # snadampal: skipping llvm src build install because the current version | ||||
|     # from pytorch/llvm:9.0.1 is x86 specific | ||||
|     SKIP_LLVM_SRC_BUILD_INSTALL=yes | ||||
| @ -518,6 +494,8 @@ docker build \ | ||||
|        --build-arg "CUDA_VERSION=${CUDA_VERSION}" \ | ||||
|        --build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \ | ||||
|        --build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \ | ||||
|        --build-arg "ANDROID=${ANDROID}" \ | ||||
|        --build-arg "ANDROID_NDK=${ANDROID_NDK_VERSION}" \ | ||||
|        --build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \ | ||||
|        --build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \ | ||||
|        --build-arg "SWIFTSHADER=${SWIFTSHADER}" \ | ||||
| @ -525,13 +503,12 @@ docker build \ | ||||
|        --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \ | ||||
|        --build-arg "KATEX=${KATEX:-}" \ | ||||
|        --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \ | ||||
|        --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a}" \ | ||||
|        --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx906;gfx90a}" \ | ||||
|        --build-arg "IMAGE_NAME=${IMAGE_NAME}" \ | ||||
|        --build-arg "UCX_COMMIT=${UCX_COMMIT}" \ | ||||
|        --build-arg "UCC_COMMIT=${UCC_COMMIT}" \ | ||||
|        --build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \ | ||||
|        --build-arg "TRITON=${TRITON}" \ | ||||
|        --build-arg "TRITON_CPU=${TRITON_CPU}" \ | ||||
|        --build-arg "ONNX=${ONNX}" \ | ||||
|        --build-arg "DOCS=${DOCS}" \ | ||||
|        --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \ | ||||
|  | ||||
| @ -108,10 +108,10 @@ ENV CMAKE_C_COMPILER cc | ||||
| ENV CMAKE_CXX_COMPILER c++ | ||||
| COPY ./common/install_triton.sh install_triton.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/triton.txt triton.txt | ||||
| COPY ci_commit_pins/triton-rocm.txt triton-rocm.txt | ||||
| COPY triton_version.txt triton_version.txt | ||||
| RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt | ||||
| RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt | ||||
|  | ||||
| # Install AOTriton (Early fail) | ||||
| COPY ./aotriton_version.txt aotriton_version.txt | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 6f638937d64e3396793956d75ee3e14802022745 | ||||
| d519b4d3a1ffdc81b45e2b1d4733423ce0577813 | ||||
|  | ||||
| @ -1 +0,0 @@ | ||||
| c7711371cace304afe265c1ffa906415ab82fc66 | ||||
							
								
								
									
										1
									
								
								.ci/docker/ci_commit_pins/triton-rocm.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.ci/docker/ci_commit_pins/triton-rocm.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| 21eae954efa5bf584da70324b640288c3ee7aede | ||||
| @ -1 +1 @@ | ||||
| e98b6fcb8df5b44eb0d0addb6767c573d37ba024 | ||||
| 1b2f15840e0d70eec50d84c7a0575cb835524def | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 35c6c7c6284582b3f41c71c150e11b517acf074a | ||||
| dedb7bdf339a3546896d4820366ca562c586bfa0 | ||||
|  | ||||
							
								
								
									
										112
									
								
								.ci/docker/common/install_android.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										112
									
								
								.ci/docker/common/install_android.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,112 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| [ -n "${ANDROID_NDK}" ] | ||||
|  | ||||
| _https_amazon_aws=https://ossci-android.s3.amazonaws.com | ||||
|  | ||||
| apt-get update | ||||
| apt-get install -y --no-install-recommends autotools-dev autoconf unzip | ||||
| apt-get autoclean && apt-get clean | ||||
| rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | ||||
|  | ||||
| pushd /tmp | ||||
| curl -Os --retry 3 $_https_amazon_aws/android-ndk-${ANDROID_NDK}-linux-x86_64.zip | ||||
| popd | ||||
| _ndk_dir=/opt/ndk | ||||
| mkdir -p "$_ndk_dir" | ||||
| unzip -qo /tmp/android*.zip -d "$_ndk_dir" | ||||
| _versioned_dir=$(find "$_ndk_dir/" -mindepth 1 -maxdepth 1 -type d) | ||||
| mv "$_versioned_dir"/* "$_ndk_dir"/ | ||||
| rmdir "$_versioned_dir" | ||||
| rm -rf /tmp/* | ||||
|  | ||||
| # Install OpenJDK | ||||
| # https://hub.docker.com/r/picoded/ubuntu-openjdk-8-jdk/dockerfile/ | ||||
|  | ||||
| sudo apt-get update && \ | ||||
|     apt-get install -y openjdk-8-jdk && \ | ||||
|     apt-get install -y ant && \ | ||||
|     apt-get clean && \ | ||||
|     rm -rf /var/lib/apt/lists/* && \ | ||||
|     rm -rf /var/cache/oracle-jdk8-installer; | ||||
|  | ||||
| # Fix certificate issues, found as of | ||||
| # https://bugs.launchpad.net/ubuntu/+source/ca-certificates-java/+bug/983302 | ||||
|  | ||||
| sudo apt-get update && \ | ||||
|     apt-get install -y ca-certificates-java && \ | ||||
|     apt-get clean && \ | ||||
|     update-ca-certificates -f && \ | ||||
|     rm -rf /var/lib/apt/lists/* && \ | ||||
|     rm -rf /var/cache/oracle-jdk8-installer; | ||||
|  | ||||
| export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/ | ||||
|  | ||||
| # Installing android sdk | ||||
| # https://github.com/circleci/circleci-images/blob/staging/android/Dockerfile.m4 | ||||
|  | ||||
| _tmp_sdk_zip=/tmp/android-sdk-linux.zip | ||||
| _android_home=/opt/android/sdk | ||||
|  | ||||
| rm -rf $_android_home | ||||
| sudo mkdir -p $_android_home | ||||
| curl --silent --show-error --location --fail --retry 3 --output /tmp/android-sdk-linux.zip $_https_amazon_aws/android-sdk-linux-tools3859397-build-tools2803-2902-platforms28-29.zip | ||||
| sudo unzip -q $_tmp_sdk_zip -d $_android_home | ||||
| rm $_tmp_sdk_zip | ||||
|  | ||||
| sudo chmod -R 777 $_android_home | ||||
|  | ||||
| export ANDROID_HOME=$_android_home | ||||
| export ADB_INSTALL_TIMEOUT=120 | ||||
|  | ||||
| export PATH="${ANDROID_HOME}/tools:${ANDROID_HOME}/tools/bin:${ANDROID_HOME}/platform-tools:${PATH}" | ||||
| echo "PATH:${PATH}" | ||||
|  | ||||
| # Installing Gradle | ||||
| echo "GRADLE_VERSION:${GRADLE_VERSION}" | ||||
| _gradle_home=/opt/gradle | ||||
| sudo rm -rf $gradle_home | ||||
| sudo mkdir -p $_gradle_home | ||||
|  | ||||
| curl --silent --output /tmp/gradle.zip --retry 3 $_https_amazon_aws/gradle-${GRADLE_VERSION}-bin.zip | ||||
|  | ||||
| sudo unzip -q /tmp/gradle.zip -d $_gradle_home | ||||
| rm /tmp/gradle.zip | ||||
|  | ||||
| sudo chmod -R 777 $_gradle_home | ||||
|  | ||||
| export GRADLE_HOME=$_gradle_home/gradle-$GRADLE_VERSION | ||||
| alias gradle="${GRADLE_HOME}/bin/gradle" | ||||
|  | ||||
| export PATH="${GRADLE_HOME}/bin/:${PATH}" | ||||
| echo "PATH:${PATH}" | ||||
|  | ||||
| gradle --version | ||||
|  | ||||
| mkdir /var/lib/jenkins/gradledeps | ||||
| cp build.gradle /var/lib/jenkins/gradledeps | ||||
| cp AndroidManifest.xml /var/lib/jenkins/gradledeps | ||||
|  | ||||
| pushd /var/lib/jenkins | ||||
|  | ||||
| export GRADLE_LOCAL_PROPERTIES=gradledeps/local.properties | ||||
| rm -f $GRADLE_LOCAL_PROPERTIES | ||||
| echo "sdk.dir=/opt/android/sdk" >> $GRADLE_LOCAL_PROPERTIES | ||||
| echo "ndk.dir=/opt/ndk" >> $GRADLE_LOCAL_PROPERTIES | ||||
|  | ||||
| chown -R jenkins /var/lib/jenkins/gradledeps | ||||
| chgrp -R jenkins /var/lib/jenkins/gradledeps | ||||
|  | ||||
| sudo -H -u jenkins $GRADLE_HOME/bin/gradle -Pandroid.useAndroidX=true -p /var/lib/jenkins/gradledeps -g /var/lib/jenkins/.gradle --refresh-dependencies --debug --stacktrace assemble | ||||
|  | ||||
| chown -R jenkins /var/lib/jenkins/.gradle | ||||
| chgrp -R jenkins /var/lib/jenkins/.gradle | ||||
|  | ||||
| popd | ||||
|  | ||||
| rm -rf /var/lib/jenkins/.gradle/daemon | ||||
|  | ||||
| # Cache vision models used by the test | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/cache_vision_models.sh" | ||||
| @ -4,12 +4,12 @@ set -ex | ||||
|  | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" | ||||
|  | ||||
| TARBALL='aotriton.tar.gz' | ||||
| TARBALL='aotriton.tar.bz2' | ||||
| # This read command alwasy returns with exit code 1 | ||||
| read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true | ||||
| ARCH=$(uname -m) | ||||
| AOTRITON_INSTALL_PREFIX="$1" | ||||
| AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.gz" | ||||
| AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.bz2" | ||||
|  | ||||
| cd "${AOTRITON_INSTALL_PREFIX}" | ||||
| # Must use -L to follow redirects | ||||
|  | ||||
| @ -76,8 +76,7 @@ install_ubuntu() { | ||||
|     vim \ | ||||
|     unzip \ | ||||
|     gpg-agent \ | ||||
|     gdb \ | ||||
|     bc | ||||
|     gdb | ||||
|  | ||||
|   # Should resolve issues related to various apt package repository cert issues | ||||
|   # see: https://github.com/pytorch/pytorch/issues/65931 | ||||
|  | ||||
| @ -9,7 +9,7 @@ install_ubuntu() { | ||||
|   # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh`` | ||||
|   apt-get install -y cargo | ||||
|   echo "Checking out sccache repo" | ||||
|   git clone https://github.com/mozilla/sccache -b v0.8.2 | ||||
|   git clone https://github.com/pytorch/sccache | ||||
|   cd sccache | ||||
|   echo "Building sccache" | ||||
|   cargo build --release | ||||
| @ -19,10 +19,6 @@ install_ubuntu() { | ||||
|   rm -rf sccache | ||||
|   apt-get remove -y cargo rustc | ||||
|   apt-get autoclean && apt-get clean | ||||
|  | ||||
|   echo "Downloading old sccache binary from S3 repo for PCH builds" | ||||
|   curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache-0.2.14a | ||||
|   chmod 755 /opt/cache/bin/sccache-0.2.14a | ||||
| } | ||||
|  | ||||
| install_binary() { | ||||
| @ -39,43 +35,19 @@ export PATH="/opt/cache/bin:$PATH" | ||||
| if [ -n "$ROCM_VERSION" ]; then | ||||
|   curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache | ||||
| else | ||||
|   install_ubuntu | ||||
|   ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') | ||||
|   # TODO: Install the pre-built binary from S3 as building from source | ||||
|   # https://github.com/pytorch/sccache has started failing mysteriously | ||||
|   # in which sccache server couldn't start with the following error: | ||||
|   #   sccache: error: Invalid argument (os error 22) | ||||
|   install_binary | ||||
| fi | ||||
| chmod a+x /opt/cache/bin/sccache | ||||
|  | ||||
| function write_sccache_stub() { | ||||
|   # Unset LD_PRELOAD for ps because of asan + ps issues | ||||
|   # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589 | ||||
|   if [ $1 == "gcc" ]; then | ||||
|     # Do not call sccache recursively when dumping preprocessor argument | ||||
|     # For some reason it's very important for the first cached nvcc invocation | ||||
|     cat >"/opt/cache/bin/$1" <<EOF | ||||
| #!/bin/sh | ||||
|  | ||||
| # sccache does not support -E flag, so we need to call the original compiler directly in order to avoid calling this wrapper recursively | ||||
| for arg in "\$@"; do | ||||
|   if [ "\$arg" = "-E" ]; then | ||||
|     exec $(which $1) "\$@" | ||||
|   fi | ||||
| done | ||||
|  | ||||
| if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then | ||||
|   exec sccache $(which $1) "\$@" | ||||
| else | ||||
|   exec $(which $1) "\$@" | ||||
| fi | ||||
| EOF | ||||
|   else | ||||
|     cat >"/opt/cache/bin/$1" <<EOF | ||||
| #!/bin/sh | ||||
|  | ||||
| if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then | ||||
|   exec sccache $(which $1) "\$@" | ||||
| else | ||||
|   exec $(which $1) "\$@" | ||||
| fi | ||||
| EOF | ||||
|   fi | ||||
|   printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n  exec sccache $(which $1) \"\$@\"\nelse\n  exec $(which $1) \"\$@\"\nfi" > "/opt/cache/bin/$1" | ||||
|   chmod a+x "/opt/cache/bin/$1" | ||||
| } | ||||
|  | ||||
| @ -116,7 +88,7 @@ if [ -n "$ROCM_VERSION" ]; then | ||||
|     TOPDIR=$(dirname $OLDCOMP) | ||||
|     WRAPPED="$TOPDIR/original/$COMPNAME" | ||||
|     mv "$OLDCOMP" "$WRAPPED" | ||||
|     printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" >"$OLDCOMP" | ||||
|     printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" > "$OLDCOMP" | ||||
|     chmod a+x "$OLDCOMP" | ||||
|   } | ||||
|  | ||||
|  | ||||
| @ -13,18 +13,11 @@ if [ -n "$CLANG_VERSION" ]; then | ||||
|   elif [[ $UBUNTU_VERSION == 22.04 ]]; then | ||||
|     # work around ubuntu apt-get conflicts | ||||
|     sudo apt-get -y -f install | ||||
|     wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add  - | ||||
|     if [[ $CLANG_VERSION == 18 ]]; then | ||||
|       apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" | ||||
|     fi | ||||
|   fi | ||||
|  | ||||
|   sudo apt-get update | ||||
|   if [[ $CLANG_VERSION -ge 18 ]]; then | ||||
|     apt-get install -y libomp-${CLANG_VERSION}-dev libclang-rt-${CLANG_VERSION}-dev clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION" | ||||
|   else | ||||
|     apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION" | ||||
|   fi | ||||
|   apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" | ||||
|   apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION" | ||||
|  | ||||
|   # Install dev version of LLVM. | ||||
|   if [ -n "$LLVMDEV" ]; then | ||||
|  | ||||
| @ -25,8 +25,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then | ||||
|   mkdir -p /opt/conda | ||||
|   chown jenkins:jenkins /opt/conda | ||||
|  | ||||
|   SCRIPT_FOLDER="$( cd "$(dirname "$0")" ; pwd -P )" | ||||
|   source "${SCRIPT_FOLDER}/common_utils.sh" | ||||
|   source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" | ||||
|  | ||||
|   pushd /tmp | ||||
|   wget -q "${BASE_URL}/${CONDA_FILE}" | ||||
| @ -66,10 +65,23 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then | ||||
|  | ||||
|   # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README | ||||
|   if [[ $(uname -m) == "aarch64" ]]; then | ||||
|     conda_install "openblas==0.3.28=*openmp*" | ||||
|     CONDA_COMMON_DEPS="astunparse pyyaml setuptools openblas==0.3.25=*openmp* ninja==1.11.1 scons==4.5.2" | ||||
|  | ||||
|     if [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then | ||||
|       NUMPY_VERSION=1.24.4 | ||||
|     else | ||||
|       NUMPY_VERSION=1.26.2 | ||||
|     fi | ||||
|   else | ||||
|     conda_install "mkl=2021.4.0 mkl-include=2021.4.0" | ||||
|     CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools" | ||||
|  | ||||
|     if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.12" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.13" ]; then | ||||
|       NUMPY_VERSION=1.26.0 | ||||
|     else | ||||
|       NUMPY_VERSION=1.21.2 | ||||
|     fi | ||||
|   fi | ||||
|   conda_install ${CONDA_COMMON_DEPS} | ||||
|  | ||||
|   # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source | ||||
|   # and libpython-static for torch deploy | ||||
| @ -85,13 +97,14 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then | ||||
|  | ||||
|   # Magma package names are concatenation of CUDA major and minor ignoring revision | ||||
|   # I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89 | ||||
|   # Magma is installed from a tarball in the ossci-linux bucket into the conda env | ||||
|   if [ -n "$CUDA_VERSION" ]; then | ||||
|     ${SCRIPT_FOLDER}/install_magma_conda.sh $(cut -f1-2 -d'.' <<< ${CUDA_VERSION}) ${ANACONDA_PYTHON_VERSION} | ||||
|     conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch | ||||
|   fi | ||||
|  | ||||
|   # Install some other packages, including those needed for Python test reporting | ||||
|   pip_install -r /opt/conda/requirements-ci.txt | ||||
|   pip_install numpy=="$NUMPY_VERSION" | ||||
|   pip_install -U scikit-learn | ||||
|  | ||||
|   if [ -n "$DOCS" ]; then | ||||
|     apt-get update | ||||
|  | ||||
| @ -7,7 +7,7 @@ PYTHON_DOWNLOAD_GITHUB_BRANCH=https://github.com/python/cpython/archive/refs/hea | ||||
| GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py | ||||
|  | ||||
| # Python versions to be installed in /opt/$VERSION_NO | ||||
| CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"} | ||||
| CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0"} | ||||
|  | ||||
| function check_var { | ||||
|     if [ -z "$1" ]; then | ||||
| @ -22,13 +22,6 @@ function do_cpython_build { | ||||
|     check_var $py_ver | ||||
|     check_var $py_folder | ||||
|     tar -xzf Python-$py_ver.tgz | ||||
|  | ||||
|     local additional_flags="" | ||||
|     if [ "$py_ver" == "3.13.0t" ]; then | ||||
|         additional_flags=" --disable-gil" | ||||
|         mv cpython-3.13/ cpython-3.13t/ | ||||
|     fi | ||||
|  | ||||
|     pushd $py_folder | ||||
|  | ||||
|     local prefix="/opt/_internal/cpython-${py_ver}" | ||||
| @ -44,10 +37,8 @@ function do_cpython_build { | ||||
|         local openssl_flags="--with-openssl=${WITH_OPENSSL} --with-openssl-rpath=auto" | ||||
|     fi | ||||
|  | ||||
|  | ||||
|  | ||||
|     # -Wformat added for https://bugs.python.org/issue17547 on Python 2.6 | ||||
|     CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} ${additional_flags} > /dev/null | ||||
|     CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} > /dev/null | ||||
|  | ||||
|     make -j40 > /dev/null | ||||
|     make install > /dev/null | ||||
| @ -78,14 +69,7 @@ function build_cpython { | ||||
|     check_var $py_ver | ||||
|     check_var $PYTHON_DOWNLOAD_URL | ||||
|     local py_ver_folder=$py_ver | ||||
|  | ||||
|     if [ "$py_ver" = "3.13.0t" ]; then | ||||
|         PY_VER_SHORT="3.13" | ||||
|         PYT_VER_SHORT="3.13t" | ||||
|         check_var $PYTHON_DOWNLOAD_GITHUB_BRANCH | ||||
|         wget $PYTHON_DOWNLOAD_GITHUB_BRANCH/$PY_VER_SHORT.tar.gz -O Python-$py_ver.tgz | ||||
|         do_cpython_build $py_ver cpython-$PYT_VER_SHORT | ||||
|     elif [ "$py_ver" = "3.13.0" ]; then | ||||
|     if [ "$py_ver" = "3.13.0" ]; then | ||||
|         PY_VER_SHORT="3.13" | ||||
|         check_var $PYTHON_DOWNLOAD_GITHUB_BRANCH | ||||
|         wget $PYTHON_DOWNLOAD_GITHUB_BRANCH/$PY_VER_SHORT.tar.gz -O Python-$py_ver.tgz | ||||
|  | ||||
| @ -3,7 +3,7 @@ | ||||
| set -ex | ||||
|  | ||||
| NCCL_VERSION=v2.21.5-1 | ||||
| CUDNN_VERSION=9.5.1.17 | ||||
| CUDNN_VERSION=9.1.0.70 | ||||
|  | ||||
| function install_cusparselt_040 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
| @ -38,19 +38,7 @@ function install_cusparselt_062 { | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_cusparselt_063 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_118 { | ||||
|     CUDNN_VERSION=9.1.0.70 | ||||
|     echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0" | ||||
|     rm -rf /usr/local/cuda-11.8 /usr/local/cuda | ||||
|     # install CUDA 11.8.0 in the same container | ||||
| @ -117,8 +105,7 @@ function install_121 { | ||||
| } | ||||
|  | ||||
| function install_124 { | ||||
|   CUDNN_VERSION=9.1.0.70 | ||||
|   echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" | ||||
|   echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2" | ||||
|   rm -rf /usr/local/cuda-12.4 /usr/local/cuda | ||||
|   # install CUDA 12.4.1 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run | ||||
| @ -150,39 +137,6 @@ function install_124 { | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function install_126 { | ||||
|   echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3" | ||||
|   rm -rf /usr/local/cuda-12.6 /usr/local/cuda | ||||
|   # install CUDA 12.6.3 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run | ||||
|   chmod +x cuda_12.6.3_560.35.05_linux.run | ||||
|   ./cuda_12.6.3_560.35.05_linux.run --toolkit --silent | ||||
|   rm -f cuda_12.6.3_560.35.05_linux.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   install_cusparselt_063 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function prune_118 { | ||||
|     echo "Pruning CUDA 11.8 and cuDNN" | ||||
|     ##################################################################################### | ||||
| @ -273,46 +227,12 @@ function prune_124 { | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.4 prune visual tools | ||||
|   # CUDA 12.1 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.4/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ | ||||
| } | ||||
|  | ||||
| function prune_126 { | ||||
|   echo "Pruning CUDA 12.6" | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune static libs | ||||
|   ##################################################################################### | ||||
|   export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune" | ||||
|   export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64" | ||||
|  | ||||
|   export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|   export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|  | ||||
|   if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||||
|       export GENCODE=$OVERRIDE_GENCODE | ||||
|   fi | ||||
|   if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then | ||||
|       export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN | ||||
|   fi | ||||
|  | ||||
|   # all CUDA libs except CuDNN and CuBLAS | ||||
|   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \ | ||||
|       | xargs -I {} bash -c \ | ||||
|                 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||||
|  | ||||
|   # prune CuDNN and CuBLAS | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.6/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/ | ||||
| } | ||||
|  | ||||
| # idiomatic parameter and option handling in sh | ||||
| while test $# -gt 0 | ||||
| do | ||||
| @ -323,8 +243,6 @@ do | ||||
|         ;; | ||||
|     12.4) install_124; prune_124 | ||||
|         ;; | ||||
|     12.6) install_126; prune_126 | ||||
|         ;; | ||||
|     *) echo "bad argument $1"; exit 1 | ||||
|         ;; | ||||
|     esac | ||||
|  | ||||
| @ -4,33 +4,20 @@ | ||||
| set -ex | ||||
|  | ||||
| NCCL_VERSION=v2.21.5-1 | ||||
| CUDNN_VERSION=9.5.1.17 | ||||
|  | ||||
| function install_cusparselt_062 { | ||||
| function install_cusparselt_052 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_cusparselt_063 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-sbsa-0.5.2.1-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_124 { | ||||
|   CUDNN_VERSION=9.1.0.70 | ||||
|   echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" | ||||
|   echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2" | ||||
|   rm -rf /usr/local/cuda-12.4 /usr/local/cuda | ||||
|   # install CUDA 12.4.1 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run | ||||
| @ -41,10 +28,10 @@ function install_124 { | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz -O cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
| @ -57,7 +44,7 @@ function install_124 { | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   install_cusparselt_062 | ||||
|   install_cusparselt_052 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
| @ -87,87 +74,18 @@ function prune_124 { | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.4 prune visual tools | ||||
|   # CUDA 12.1 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.4/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ | ||||
| } | ||||
|  | ||||
| function install_126 { | ||||
|   echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3" | ||||
|   rm -rf /usr/local/cuda-12.6 /usr/local/cuda | ||||
|   # install CUDA 12.6.3 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux_sbsa.run | ||||
|   chmod +x cuda_12.6.3_560.35.05_linux_sbsa.run | ||||
|   ./cuda_12.6.3_560.35.05_linux_sbsa.run --toolkit --silent | ||||
|   rm -f cuda_12.6.3_560.35.05_linux_sbsa.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   install_cusparselt_063 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function prune_126 { | ||||
|   echo "Pruning CUDA 12.6" | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune static libs | ||||
|   ##################################################################################### | ||||
|   export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune" | ||||
|   export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64" | ||||
|  | ||||
|   export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|   export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|  | ||||
|   if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||||
|       export GENCODE=$OVERRIDE_GENCODE | ||||
|   fi | ||||
|   if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then | ||||
|       export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN | ||||
|   fi | ||||
|  | ||||
|   # all CUDA libs except CuDNN and CuBLAS | ||||
|   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \ | ||||
|       | xargs -I {} bash -c \ | ||||
|                 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||||
|  | ||||
|   # prune CuDNN and CuBLAS | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.6/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/ | ||||
| } | ||||
|  | ||||
| # idiomatic parameter and option handling in sh | ||||
| while test $# -gt 0 | ||||
| do | ||||
|     case "$1" in | ||||
|     12.4) install_124; prune_124 | ||||
|         ;; | ||||
|     12.6) install_126; prune_126 | ||||
|         ;; | ||||
|     *) echo "bad argument $1"; exit 1 | ||||
|         ;; | ||||
|     esac | ||||
|  | ||||
| @ -4,9 +4,7 @@ if [[ -n "${CUDNN_VERSION}" ]]; then | ||||
|     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|     mkdir tmp_cudnn | ||||
|     pushd tmp_cudnn | ||||
|     if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then | ||||
|         CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive" | ||||
|     elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then | ||||
|     if [[ ${CUDA_VERSION:0:2} == "12" ]]; then | ||||
|         CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive" | ||||
|     elif [[ ${CUDA_VERSION:0:2} == "11" ]]; then | ||||
|         CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda11-archive" | ||||
|  | ||||
| @ -5,7 +5,7 @@ set -ex | ||||
| # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
| mkdir tmp_cusparselt && cd tmp_cusparselt | ||||
|  | ||||
| if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then | ||||
| if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-4]$ ]]; then | ||||
|     arch_path='sbsa' | ||||
|     export TARGETARCH=${TARGETARCH:-$(uname -m)} | ||||
|     if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then | ||||
|  | ||||
| @ -36,19 +36,25 @@ install_conda_dependencies() { | ||||
| } | ||||
|  | ||||
| install_pip_dependencies() { | ||||
|   pushd executorch | ||||
|   as_jenkins bash install_requirements.sh --pybind xnnpack | ||||
|   pushd executorch/.ci/docker | ||||
|   # Install PyTorch CPU build beforehand to avoid installing the much bigger CUDA | ||||
|   # binaries later, ExecuTorch only needs CPU | ||||
|   pip_install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu | ||||
|   # Install all Python dependencies | ||||
|   pip_install -r requirements-ci.txt | ||||
|   popd | ||||
| } | ||||
|  | ||||
| setup_executorch() { | ||||
|   pushd executorch | ||||
|   # Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate | ||||
|   as_jenkins bash .ci/scripts/setup-vulkan-linux-deps.sh | ||||
|  | ||||
|   export PYTHON_EXECUTABLE=python | ||||
|   export EXECUTORCH_BUILD_PYBIND=ON | ||||
|   export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" | ||||
|  | ||||
|   as_jenkins .ci/scripts/setup-linux.sh cmake || true | ||||
|   as_jenkins .ci/scripts/setup-linux.sh cmake | ||||
|   popd | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -7,20 +7,14 @@ source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" | ||||
| function install_huggingface() { | ||||
|   local version | ||||
|   commit=$(get_pinned_commit huggingface) | ||||
|   pip_install pandas==2.0.3 | ||||
|   pip_install "git+https://github.com/huggingface/transformers@${commit}" | ||||
| } | ||||
|  | ||||
| function install_timm() { | ||||
|   local commit | ||||
|   commit=$(get_pinned_commit timm) | ||||
|  | ||||
|   # TODO (huydhn): There is no torchvision release on 3.13 when I write this, so | ||||
|   # I'm using nightly here instead. We just need to package to be able to install | ||||
|   # TIMM. Removing this once vision has a release on 3.13 | ||||
|   if [[ "${ANACONDA_PYTHON_VERSION}" == "3.13" ]]; then | ||||
|     pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124 | ||||
|   fi | ||||
|  | ||||
|   pip_install pandas==2.0.3 | ||||
|   pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}" | ||||
|   # Clean up | ||||
|   conda_run pip uninstall -y cmake torch torchvision triton | ||||
|  | ||||
| @ -3,6 +3,8 @@ | ||||
|  | ||||
| set -eou pipefail | ||||
|  | ||||
| MAGMA_VERSION="2.5.2" | ||||
|  | ||||
| function do_install() { | ||||
|     cuda_version=$1 | ||||
|     cuda_version_nodot=${1/./} | ||||
| @ -15,7 +17,7 @@ function do_install() { | ||||
|         set -x | ||||
|         tmp_dir=$(mktemp -d) | ||||
|         pushd ${tmp_dir} | ||||
|         curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive} | ||||
|         curl -OLs https://anaconda.org/pytorch/magma-cuda${cuda_version_nodot}/${MAGMA_VERSION}/download/linux-64/${magma_archive} | ||||
|         tar -xvf "${magma_archive}" | ||||
|         mkdir -p "${cuda_dir}/magma" | ||||
|         mv include "${cuda_dir}/magma/include" | ||||
|  | ||||
| @ -1,26 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
| # Script that replaces the magma install from a conda package | ||||
|  | ||||
| set -eou pipefail | ||||
|  | ||||
| function do_install() { | ||||
|     cuda_version_nodot=${1/./} | ||||
|     anaconda_python_version=$2 | ||||
|  | ||||
|     MAGMA_VERSION="2.6.1" | ||||
|     magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" | ||||
|  | ||||
|     anaconda_dir="/opt/conda/envs/py_${anaconda_python_version}" | ||||
|     ( | ||||
|         set -x | ||||
|         tmp_dir=$(mktemp -d) | ||||
|         pushd ${tmp_dir} | ||||
|         curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive} | ||||
|         tar -xvf "${magma_archive}" | ||||
|         mv include/* "${anaconda_dir}/include/" | ||||
|         mv lib/* "${anaconda_dir}/lib" | ||||
|         popd | ||||
|     ) | ||||
| } | ||||
|  | ||||
| do_install $1 $2 | ||||
| @ -10,21 +10,6 @@ if [[ -z $ROCM_VERSION ]]; then | ||||
|     exit 1; | ||||
| fi | ||||
|  | ||||
| IS_UBUNTU=0 | ||||
| ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') | ||||
| case "$ID" in | ||||
|   ubuntu) | ||||
|     IS_UBUNTU=1 | ||||
|     ;; | ||||
|   centos|almalinux) | ||||
|     IS_UBUNTU=0 | ||||
|     ;; | ||||
|   *) | ||||
|     echo "Unable to determine OS..." | ||||
|     exit 1 | ||||
|     ;; | ||||
| esac | ||||
|  | ||||
| # To make version comparison easier, create an integer representation. | ||||
| save_IFS="$IFS" | ||||
| IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION}) | ||||
| @ -43,6 +28,12 @@ else | ||||
| fi | ||||
| ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH)) | ||||
|  | ||||
| # Install custom MIOpen + COMgr for ROCm >= 4.0.1 | ||||
| if [[ $ROCM_INT -lt 40001 ]]; then | ||||
|     echo "ROCm version < 4.0.1; will not install custom MIOpen" | ||||
|     exit 0 | ||||
| fi | ||||
|  | ||||
| # Function to retry functions that sometimes timeout or have flaky failures | ||||
| retry () { | ||||
|     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) | ||||
| @ -60,49 +51,70 @@ else | ||||
|     ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}" | ||||
| fi | ||||
|  | ||||
| # MIOPEN_USE_HIP_KERNELS is a Workaround for COMgr issues | ||||
| MIOPEN_CMAKE_COMMON_FLAGS=" | ||||
| -DMIOPEN_USE_COMGR=ON | ||||
| -DMIOPEN_BUILD_DRIVER=OFF | ||||
| " | ||||
| if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60204 ]]; then | ||||
|     MIOPEN_BRANCH="release/rocm-rel-6.2-staging" | ||||
| else | ||||
|     echo "ROCm ${ROCM_VERSION} does not need any patches, do not build from source" | ||||
| # Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version | ||||
| if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then | ||||
|     echo "ROCm 6.2 MIOpen does not need any patches, do not build from source" | ||||
|     exit 0 | ||||
| fi | ||||
|  | ||||
|  | ||||
| if [[ ${IS_UBUNTU} == 1 ]]; then | ||||
|   apt-get remove -y miopen-hip | ||||
| elif [[ $ROCM_INT -ge 60100 ]] && [[ $ROCM_INT -lt 60200 ]]; then | ||||
|     echo "ROCm 6.1 MIOpen does not need any patches, do not build from source" | ||||
|     exit 0 | ||||
| elif [[ $ROCM_INT -ge 60000 ]] && [[ $ROCM_INT -lt 60100 ]]; then | ||||
|     echo "ROCm 6.0 MIOpen does not need any patches, do not build from source" | ||||
|     exit 0 | ||||
| elif [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 60000 ]]; then | ||||
|     echo "ROCm 5.7 MIOpen does not need any patches, do not build from source" | ||||
|     exit 0 | ||||
| elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then | ||||
|     MIOPEN_BRANCH="release/rocm-rel-5.6-staging" | ||||
| elif [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then | ||||
|     MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11" | ||||
| elif [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then | ||||
|     MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" | ||||
|     MIOPEN_BRANCH="release/rocm-rel-5.4-staging" | ||||
| elif [[ $ROCM_INT -ge 50300 ]] && [[ $ROCM_INT -lt 50400 ]]; then | ||||
|     MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" | ||||
|     MIOPEN_BRANCH="release/rocm-rel-5.3-staging" | ||||
| elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50300 ]]; then | ||||
|     MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off" | ||||
|     MIOPEN_BRANCH="release/rocm-rel-5.2-staging" | ||||
| elif [[ $ROCM_INT -ge 50100 ]] && [[ $ROCM_INT -lt 50200 ]]; then | ||||
|     MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36" | ||||
|     MIOPEN_BRANCH="release/rocm-rel-5.1-staging" | ||||
| elif [[ $ROCM_INT -ge 50000 ]] && [[ $ROCM_INT -lt 50100 ]]; then | ||||
|     MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36" | ||||
|     MIOPEN_BRANCH="release/rocm-rel-5.0-staging" | ||||
| else | ||||
|   # Workaround since almalinux manylinux image already has this and cget doesn't like that | ||||
|   rm -rf /usr/local/lib/pkgconfig/sqlite3.pc | ||||
|  | ||||
|   # Versioned package name needs regex match | ||||
|   # Use --noautoremove to prevent other rocm packages from being uninstalled | ||||
|   yum remove -y miopen-hip* --noautoremove | ||||
|     echo "Unhandled ROCM_VERSION ${ROCM_VERSION}" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| yum remove -y miopen-hip | ||||
|  | ||||
| git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH} | ||||
| pushd MIOpen | ||||
| # remove .git to save disk space since CI runner was running out | ||||
| rm -rf .git | ||||
| # Don't build CK to save docker build time | ||||
| sed -i '/composable_kernel/d' requirements.txt | ||||
| # Don't build MLIR to save docker build time | ||||
| # since we are disabling MLIR backend for MIOpen anyway | ||||
| if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then | ||||
|     sed -i '/rocMLIR/d' requirements.txt | ||||
| elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then | ||||
|     sed -i '/llvm-project-mlir/d' requirements.txt | ||||
| fi | ||||
| ## MIOpen minimum requirements | ||||
| cmake -P install_deps.cmake --minimum | ||||
|  | ||||
| # clean up since CI runner was running out of disk space | ||||
| rm -rf /tmp/* | ||||
| if [[ ${IS_UBUNTU} == 1 ]]; then | ||||
|   apt-get autoclean && apt-get clean | ||||
|   rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | ||||
| else | ||||
|   yum clean all | ||||
|   rm -rf /var/cache/yum | ||||
|   rm -rf /var/lib/yum/yumdb | ||||
|   rm -rf /var/lib/yum/history | ||||
| fi | ||||
| yum clean all | ||||
| rm -rf /var/cache/yum | ||||
| rm -rf /var/lib/yum/yumdb | ||||
| rm -rf /var/lib/yum/history | ||||
|  | ||||
| ## Build MIOpen | ||||
| mkdir -p build | ||||
| @ -110,7 +122,7 @@ cd build | ||||
| PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang++ cmake .. \ | ||||
|     ${MIOPEN_CMAKE_COMMON_FLAGS} \ | ||||
|     ${MIOPEN_CMAKE_DB_FLAGS} \ | ||||
|     -DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}" | ||||
|     -DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}" | ||||
| make MIOpen -j $(nproc) | ||||
|  | ||||
| # Build MIOpen package | ||||
| @ -119,11 +131,7 @@ make -j $(nproc) package | ||||
| # clean up since CI runner was running out of disk space | ||||
| rm -rf /usr/local/cget | ||||
|  | ||||
| if [[ ${IS_UBUNTU} == 1 ]]; then | ||||
|   sudo dpkg -i miopen-hip*.deb | ||||
| else | ||||
|   yum install -y miopen-*.rpm | ||||
| fi | ||||
| yum install -y miopen-*.rpm | ||||
|  | ||||
| popd | ||||
| rm -rf MIOpen | ||||
|  | ||||
| @ -32,7 +32,7 @@ pip_install coloredlogs packaging | ||||
|  | ||||
| pip_install onnxruntime==1.18.1 | ||||
| pip_install onnx==1.16.2 | ||||
| pip_install onnxscript==0.1.0.dev20241124 --no-deps | ||||
| pip_install onnxscript==0.1.0.dev20240831 --no-deps | ||||
| # required by onnxscript | ||||
| pip_install ml_dtypes | ||||
|  | ||||
|  | ||||
| @ -4,7 +4,7 @@ | ||||
| set -ex | ||||
|  | ||||
| cd / | ||||
| git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.28 --depth 1 --shallow-submodules | ||||
| git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.25 --depth 1 --shallow-submodules | ||||
|  | ||||
|  | ||||
| OPENBLAS_BUILD_FLAGS=" | ||||
|  | ||||
| @ -12,7 +12,7 @@ case "$ID" in | ||||
|     apt-get install -y libpciaccess-dev pkg-config | ||||
|     apt-get clean | ||||
|     ;; | ||||
|   centos|almalinux) | ||||
|   centos) | ||||
|     yum install -y libpciaccess-devel pkgconfig | ||||
|     ;; | ||||
|   *) | ||||
|  | ||||
| @ -3,18 +3,6 @@ | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| # Magma build scripts need `python` | ||||
| ln -sf /usr/bin/python3 /usr/bin/python | ||||
|  | ||||
| ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') | ||||
| case "$ID" in | ||||
|   almalinux) | ||||
|     yum install -y gcc-gfortran | ||||
|     ;; | ||||
|   *) | ||||
|     echo "No preinstalls to build magma..." | ||||
|     ;; | ||||
| esac | ||||
|  | ||||
| MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION} | ||||
|  | ||||
|  | ||||
| @ -12,14 +12,14 @@ conda_reinstall() { | ||||
|   as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y --force-reinstall $* | ||||
| } | ||||
|  | ||||
| if [ -n "${XPU_VERSION}" ]; then | ||||
| if [ -n "${ROCM_VERSION}" ]; then | ||||
|   TRITON_REPO="https://github.com/openai/triton" | ||||
|   TRITON_TEXT_FILE="triton-rocm" | ||||
| elif [ -n "${XPU_VERSION}" ]; then | ||||
|   TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton" | ||||
|   TRITON_TEXT_FILE="triton-xpu" | ||||
| elif [ -n "${TRITON_CPU}" ]; then | ||||
|   TRITON_REPO="https://github.com/triton-lang/triton-cpu" | ||||
|   TRITON_TEXT_FILE="triton-cpu" | ||||
| else | ||||
|   TRITON_REPO="https://github.com/triton-lang/triton" | ||||
|   TRITON_REPO="https://github.com/openai/triton" | ||||
|   TRITON_TEXT_FILE="triton" | ||||
| fi | ||||
|  | ||||
| @ -47,10 +47,9 @@ chown -R jenkins /var/lib/jenkins/triton | ||||
| chgrp -R jenkins /var/lib/jenkins/triton | ||||
| pushd /var/lib/jenkins/ | ||||
|  | ||||
| as_jenkins git clone --recursive ${TRITON_REPO} triton | ||||
| as_jenkins git clone ${TRITON_REPO} triton | ||||
| cd triton | ||||
| as_jenkins git checkout ${TRITON_PINNED_COMMIT} | ||||
| as_jenkins git submodule update --init --recursive | ||||
| cd python | ||||
|  | ||||
| # TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527 | ||||
|  | ||||
| @ -2,13 +2,6 @@ | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| # Since version 24 the system ships with user 'ubuntu' that has id 1000 | ||||
| # We need a work-around to enable id 1000 usage for this script | ||||
| if [[ $UBUNTU_VERSION == 24.04 ]]; then | ||||
|     # touch is used to disable harmless error message | ||||
|     touch /var/mail/ubuntu && chown ubuntu /var/mail/ubuntu && userdel -r ubuntu | ||||
| fi | ||||
|  | ||||
| # Mirror jenkins user in container | ||||
| # jenkins user as ec2-user should have the same user-id | ||||
| echo "jenkins:x:1000:1000::/var/lib/jenkins:" >> /etc/passwd | ||||
|  | ||||
| @ -24,10 +24,10 @@ function install_ubuntu() { | ||||
|         | tee /etc/apt/sources.list.d/intel-gpu-${VERSION_CODENAME}.list | ||||
|     # To add the online network network package repository for the Intel Support Packages | ||||
|     wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ | ||||
|         | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg.gpg | ||||
|     echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg.gpg] \ | ||||
|         https://apt.repos.intel.com/${XPU_REPO_NAME} all main" \ | ||||
|         | tee /etc/apt/sources.list.d/oneAPI.list | ||||
|         | gpg --dearmor > /usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg | ||||
|     echo "deb [signed-by=/usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg] \ | ||||
|         https://apt.repos.intel.com/intel-for-pytorch-gpu-dev all main" \ | ||||
|         | tee /etc/apt/sources.list.d/intel-for-pytorch-gpu-dev.list | ||||
|  | ||||
|     # Update the packages list and repository index | ||||
|     apt-get update | ||||
| @ -41,13 +41,14 @@ function install_ubuntu() { | ||||
|         libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \ | ||||
|         libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \ | ||||
|         mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo | ||||
|     if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then | ||||
|         apt-get install -y intel-ocloc | ||||
|     fi | ||||
|     # Development Packages | ||||
|     apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev | ||||
|     # Install Intel Support Packages | ||||
|     apt-get install -y ${XPU_PACKAGES} | ||||
|     if [ -n "$XPU_VERSION" ]; then | ||||
|         apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION} intel-pti-dev | ||||
|     else | ||||
|         apt-get install -y intel-for-pytorch-gpu-dev intel-pti-dev | ||||
|     fi | ||||
|  | ||||
|     # Cleanup | ||||
|     apt-get autoclean && apt-get clean | ||||
| @ -57,13 +58,13 @@ function install_ubuntu() { | ||||
| function install_rhel() { | ||||
|     . /etc/os-release | ||||
|     if [[ "${ID}" == "rhel" ]]; then | ||||
|         if [[ ! " 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then | ||||
|         if [[ ! " 8.6 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then | ||||
|             echo "RHEL version ${VERSION_ID} not supported" | ||||
|             exit | ||||
|         fi | ||||
|     elif [[ "${ID}" == "almalinux" ]]; then | ||||
|         # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64 | ||||
|         VERSION_ID="8.8" | ||||
|         VERSION_ID="8.6" | ||||
|     fi | ||||
|  | ||||
|     dnf install -y 'dnf-command(config-manager)' | ||||
| @ -71,18 +72,16 @@ function install_rhel() { | ||||
|     dnf config-manager --add-repo \ | ||||
|         https://repositories.intel.com/gpu/rhel/${VERSION_ID}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_ID}.repo | ||||
|     # To add the online network network package repository for the Intel Support Packages | ||||
|     tee > /etc/yum.repos.d/oneAPI.repo << EOF | ||||
| [oneAPI] | ||||
|     tee > /etc/yum.repos.d/intel-for-pytorch-gpu-dev.repo << EOF | ||||
| [intel-for-pytorch-gpu-dev] | ||||
| name=Intel for Pytorch GPU dev repository | ||||
| baseurl=https://yum.repos.intel.com/${XPU_REPO_NAME} | ||||
| baseurl=https://yum.repos.intel.com/intel-for-pytorch-gpu-dev | ||||
| enabled=1 | ||||
| gpgcheck=1 | ||||
| repo_gpgcheck=1 | ||||
| gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | ||||
| EOF | ||||
|  | ||||
|     # Install Intel Support Packages | ||||
|     yum install -y ${XPU_PACKAGES} | ||||
|     # The xpu-smi packages | ||||
|     dnf install -y xpu-smi | ||||
|     # Compute and Media Runtimes | ||||
| @ -97,6 +96,8 @@ EOF | ||||
|     dnf install -y --refresh \ | ||||
|         intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \ | ||||
|         level-zero-devel | ||||
|     # Install Intel Support Packages | ||||
|     yum install -y intel-for-pytorch-gpu-dev intel-pti-dev | ||||
|  | ||||
|     # Cleanup | ||||
|     dnf clean all | ||||
| @ -118,7 +119,7 @@ function install_sles() { | ||||
|         https://repositories.intel.com/gpu/sles/${VERSION_SP}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_SP}.repo | ||||
|     rpm --import https://repositories.intel.com/gpu/intel-graphics.key | ||||
|     # To add the online network network package repository for the Intel Support Packages | ||||
|     zypper addrepo https://yum.repos.intel.com/${XPU_REPO_NAME} oneAPI | ||||
|     zypper addrepo https://yum.repos.intel.com/intel-for-pytorch-gpu-dev intel-for-pytorch-gpu-dev | ||||
|     rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | ||||
|  | ||||
|     # The xpu-smi packages | ||||
| @ -130,7 +131,7 @@ function install_sles() { | ||||
|     zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel | ||||
|  | ||||
|     # Install Intel Support Packages | ||||
|     zypper install -y ${XPU_PACKAGES} | ||||
|     zypper install -y intel-for-pytorch-gpu-dev intel-pti-dev | ||||
|  | ||||
| } | ||||
|  | ||||
| @ -141,13 +142,6 @@ if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then | ||||
|     XPU_DRIVER_VERSION="" | ||||
| fi | ||||
|  | ||||
| XPU_REPO_NAME="intel-for-pytorch-gpu-dev" | ||||
| XPU_PACKAGES="intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9" | ||||
| if [[ "$XPU_VERSION" == "2025.0" ]]; then | ||||
|     XPU_REPO_NAME="oneapi" | ||||
|     XPU_PACKAGES="intel-deep-learning-essentials-2025.0" | ||||
| fi | ||||
|  | ||||
| # The installation depends on the base OS | ||||
| ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') | ||||
| case "$ID" in | ||||
|  | ||||
| @ -1,39 +1,47 @@ | ||||
| ARG CUDA_VERSION=12.4 | ||||
| ARG CUDA_VERSION=10.2 | ||||
| ARG BASE_TARGET=cuda${CUDA_VERSION} | ||||
| FROM amd64/almalinux:8 as base | ||||
| FROM centos:7 as base | ||||
| 
 | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
| 
 | ||||
| ARG DEVTOOLSET_VERSION=11 | ||||
| 
 | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
| 
 | ||||
| RUN yum -y update | ||||
| RUN yum -y install epel-release | ||||
| RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel openssl-devel yum-utils autoconf automake make gcc-toolset-${DEVTOOLSET_VERSION}-toolchain | ||||
| ARG DEVTOOLSET_VERSION=9 | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
| RUN yum update -y | ||||
| RUN yum install -y wget curl perl util-linux xz bzip2 git patch which unzip | ||||
| # Just add everything as a safe.directory for git since these will be used in multiple places with git | ||||
| RUN git config --global --add safe.directory '*' | ||||
| ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| RUN yum install -y yum-utils centos-release-scl | ||||
| RUN yum-config-manager --enable rhel-server-rhscl-7-rpms | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
| RUN yum install -y devtoolset-${DEVTOOLSET_VERSION}-gcc devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran devtoolset-${DEVTOOLSET_VERSION}-binutils | ||||
| # EPEL for cmake | ||||
| RUN yum --enablerepo=extras install -y epel-release | ||||
| 
 | ||||
| # cmake-3.18.4 from pip | ||||
| RUN yum install -y python3-pip && \ | ||||
|     python3 -mpip install cmake==3.18.4 && \ | ||||
|     ln -s /usr/local/bin/cmake /usr/bin/cmake3 | ||||
| # cmake | ||||
| RUN yum install -y cmake3 && \ | ||||
|     ln -s /usr/bin/cmake3 /usr/bin/cmake | ||||
| ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
| 
 | ||||
| RUN yum install -y autoconf aclocal automake make sudo | ||||
| RUN rm -rf /usr/local/cuda-* | ||||
| 
 | ||||
| FROM base as openssl | ||||
| ADD ./common/install_openssl.sh install_openssl.sh | ||||
| RUN bash ./install_openssl.sh && rm install_openssl.sh | ||||
| 
 | ||||
| FROM base as patchelf | ||||
| # Install patchelf | ||||
| ADD ./common/install_patchelf.sh install_patchelf.sh | ||||
| RUN bash ./install_patchelf.sh && rm install_patchelf.sh && cp $(which patchelf) /patchelf | ||||
| 
 | ||||
| FROM base as openssl | ||||
| # Install openssl | ||||
| ADD ./common/install_openssl.sh install_openssl.sh | ||||
| RUN bash ./install_openssl.sh && rm install_openssl.sh | ||||
| 
 | ||||
| FROM base as conda | ||||
| # Install Anaconda | ||||
| ADD ./common/install_conda_docker.sh install_conda.sh | ||||
| @ -41,7 +49,7 @@ RUN bash ./install_conda.sh && rm install_conda.sh | ||||
| 
 | ||||
| # Install CUDA | ||||
| FROM base as cuda | ||||
| ARG CUDA_VERSION=12.4 | ||||
| ARG CUDA_VERSION=10.2 | ||||
| RUN rm -rf /usr/local/cuda-* | ||||
| ADD ./common/install_cuda.sh install_cuda.sh | ||||
| ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION} | ||||
| @ -62,10 +70,6 @@ FROM cuda as cuda12.4 | ||||
| RUN bash ./install_cuda.sh 12.4 | ||||
| ENV DESIRED_CUDA=12.4 | ||||
| 
 | ||||
| FROM cuda as cuda12.6 | ||||
| RUN bash ./install_cuda.sh 12.6 | ||||
| ENV DESIRED_CUDA=12.6 | ||||
| 
 | ||||
| # Install MNIST test data | ||||
| FROM base as mnist | ||||
| ADD ./common/install_mnist.sh install_mnist.sh | ||||
| @ -75,7 +79,6 @@ FROM base as all_cuda | ||||
| COPY --from=cuda11.8  /usr/local/cuda-11.8 /usr/local/cuda-11.8 | ||||
| COPY --from=cuda12.1  /usr/local/cuda-12.1 /usr/local/cuda-12.1 | ||||
| COPY --from=cuda12.4  /usr/local/cuda-12.4 /usr/local/cuda-12.4 | ||||
| COPY --from=cuda12.6  /usr/local/cuda-12.6 /usr/local/cuda-12.6 | ||||
| 
 | ||||
| # Final step | ||||
| FROM ${BASE_TARGET} as final | ||||
| @ -88,8 +91,7 @@ COPY ./common/install_jni.sh install_jni.sh | ||||
| COPY ./java/jni.h jni.h | ||||
| RUN bash ./install_jni.sh && rm install_jni.sh | ||||
| 
 | ||||
| ENV PATH /opt/conda/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
| ENV  PATH /opt/conda/bin:$PATH | ||||
| COPY --from=mnist  /usr/local/mnist /usr/local/mnist | ||||
| RUN rm -rf /usr/local/cuda | ||||
| RUN chmod o+rw /usr/local | ||||
| @ -37,21 +37,15 @@ esac | ||||
| 
 | ||||
| ( | ||||
|   set -x | ||||
|   # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712 | ||||
|   # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023. | ||||
|   sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service | ||||
|   sudo systemctl daemon-reload | ||||
|   sudo systemctl restart docker | ||||
| 
 | ||||
|   docker build \ | ||||
|     --target final \ | ||||
|     --progress plain \ | ||||
|     --build-arg "BASE_TARGET=${BASE_TARGET}" \ | ||||
|     --build-arg "CUDA_VERSION=${CUDA_VERSION}" \ | ||||
|     --build-arg "DEVTOOLSET_VERSION=11" \ | ||||
|     --build-arg "DEVTOOLSET_VERSION=9" \ | ||||
|     -t ${DOCKER_IMAGE_NAME} \ | ||||
|     $@ \ | ||||
|     -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \ | ||||
|     -f "${TOPDIR}/.ci/docker/conda/Dockerfile" \ | ||||
|     ${TOPDIR}/.ci/docker/ | ||||
| ) | ||||
| 
 | ||||
| @ -66,11 +66,6 @@ RUN bash ./install_cuda.sh 12.4 | ||||
| RUN bash ./install_magma.sh 12.4 | ||||
| RUN ln -sf /usr/local/cuda-12.4 /usr/local/cuda | ||||
|  | ||||
| FROM cuda as cuda12.6 | ||||
| RUN bash ./install_cuda.sh 12.6 | ||||
| RUN bash ./install_magma.sh 12.6 | ||||
| RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda | ||||
|  | ||||
| FROM cpu as rocm | ||||
| ARG PYTORCH_ROCM_ARCH | ||||
| ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} | ||||
|  | ||||
| @ -39,7 +39,17 @@ case ${GPU_ARCH_TYPE} in | ||||
|         BASE_TARGET=rocm | ||||
|         DOCKER_TAG=rocm${GPU_ARCH_VERSION} | ||||
|         GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx942" | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" | ||||
|         ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" | ||||
|         if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then | ||||
|             ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0})) | ||||
|         else | ||||
|             echo "ERROR: rocm regex failed" | ||||
|             exit 1 | ||||
|         fi | ||||
|         if [[ $ROCM_VERSION_INT -ge 60000 ]]; then | ||||
|             PYTORCH_ROCM_ARCH+=";gfx942" | ||||
|         fi | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" | ||||
|         ;; | ||||
|     *) | ||||
|  | ||||
| @ -25,8 +25,7 @@ ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH | ||||
| COPY requirements-ci.txt /opt/conda/requirements-ci.txt | ||||
| COPY ./common/install_conda.sh install_conda.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./common/install_magma_conda.sh install_magma_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt | ||||
|  | ||||
| # Install cuda and cudnn | ||||
| ARG CUDA_VERSION | ||||
|  | ||||
| @ -10,7 +10,6 @@ ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
|  | ||||
| ARG DEVTOOLSET_VERSION=9 | ||||
|  | ||||
| # Note: This is required patch since CentOS have reached EOL | ||||
| # otherwise any yum install setp will fail | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| @ -144,10 +143,6 @@ COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/ | ||||
| FROM common as cpu_final | ||||
| ARG BASE_CUDA_VERSION=10.1 | ||||
| ARG DEVTOOLSET_VERSION=9 | ||||
| # Install Anaconda | ||||
| ADD ./common/install_conda_docker.sh install_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh | ||||
| ENV PATH /opt/conda/bin:$PATH | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
|  | ||||
| @ -1,4 +1,5 @@ | ||||
| # syntax = docker/dockerfile:experimental | ||||
| ARG ROCM_VERSION=3.7 | ||||
| ARG BASE_CUDA_VERSION=11.8 | ||||
| ARG GPU_IMAGE=amd64/almalinux:8 | ||||
| FROM quay.io/pypa/manylinux_2_28_x86_64 as base | ||||
| @ -116,49 +117,30 @@ COPY --from=jni                /usr/local/include/jni.h              /usr/local/ | ||||
| FROM common as cpu_final | ||||
| ARG BASE_CUDA_VERSION=11.8 | ||||
| ARG DEVTOOLSET_VERSION=11 | ||||
| # Install Anaconda | ||||
| ADD ./common/install_conda_docker.sh install_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh | ||||
| ENV PATH /opt/conda/bin:$PATH | ||||
| # Ensure the expected devtoolset is used | ||||
| ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
| # Install setuptools and wheel for python 3.12/3.13 | ||||
| RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \ | ||||
|     /opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \ | ||||
|     done; | ||||
|  | ||||
|  | ||||
| # cmake-3.18.4 from pip; force in case cmake3 already exists | ||||
| # cmake-3.18.4 from pip | ||||
| RUN yum install -y python3-pip && \ | ||||
|     python3 -mpip install cmake==3.18.4 && \ | ||||
|     ln -sf /usr/local/bin/cmake /usr/bin/cmake3 | ||||
|     ln -s /usr/local/bin/cmake /usr/bin/cmake3 | ||||
|  | ||||
| FROM cpu_final as cuda_final | ||||
| RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda | ||||
| ENV PATH=/usr/local/cuda/bin:$PATH | ||||
|  | ||||
| FROM cpu_final as rocm_final | ||||
| ARG ROCM_VERSION=6.0 | ||||
| ARG PYTORCH_ROCM_ARCH | ||||
| ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} | ||||
| ARG DEVTOOLSET_VERSION=11 | ||||
| ENV LDFLAGS="-Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64 -Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib" | ||||
| # Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path, | ||||
| # below workaround helps avoid error | ||||
| ENV ROCM_PATH /opt/rocm | ||||
| # cmake-3.28.4 from pip to get enable_language(HIP) | ||||
| # and avoid 3.21.0 cmake+ninja issues with ninja inserting "-Wl,--no-as-needed" in LINK_FLAGS for static linker | ||||
| RUN python3 -m pip install --upgrade pip && \ | ||||
|     python3 -mpip install cmake==3.28.4 | ||||
| ADD ./common/install_rocm_drm.sh install_rocm_drm.sh | ||||
| RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh | ||||
| ENV MKLROOT /opt/intel | ||||
| ADD ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh | ||||
| FROM common as rocm_final | ||||
| ARG ROCM_VERSION=3.7 | ||||
| # Install ROCm | ||||
| ADD ./common/install_rocm.sh install_rocm.sh | ||||
| RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh | ||||
| # cmake is already installed inside the rocm base image, but both 2 and 3 exist | ||||
| # cmake3 is needed for the later MIOpen custom build, so that step is last. | ||||
| RUN yum install -y cmake3 && \ | ||||
|     rm -f /usr/bin/cmake && \ | ||||
|     ln -s /usr/bin/cmake3 /usr/bin/cmake | ||||
| ADD ./common/install_miopen.sh install_miopen.sh | ||||
| RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh | ||||
|  | ||||
| @ -168,7 +150,8 @@ ENV XPU_DRIVER_TYPE ROLLING | ||||
| # cmake-3.28.4 from pip | ||||
| RUN python3 -m pip install --upgrade pip && \ | ||||
|     python3 -mpip install cmake==3.28.4 | ||||
| # Install setuptools and wheel for python 3.13 | ||||
| RUN /opt/python/cp313-cp313/bin/python -m pip install setuptools wheel | ||||
| ADD ./common/install_xpu.sh install_xpu.sh | ||||
| ENV XPU_VERSION 2025.0 | ||||
| RUN bash ./install_xpu.sh && rm install_xpu.sh | ||||
| RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd | ||||
|  | ||||
| @ -48,11 +48,6 @@ ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/op | ||||
| # For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327 | ||||
| RUN git config --global --add safe.directory "*" | ||||
|  | ||||
| FROM base as openblas | ||||
| # Install openblas | ||||
| ADD ./common/install_openblas.sh install_openblas.sh | ||||
| RUN bash ./install_openblas.sh && rm install_openblas.sh | ||||
|  | ||||
| FROM base as final | ||||
|  | ||||
| # remove unncessary python versions | ||||
| @ -60,5 +55,3 @@ RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2 | ||||
| RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 | ||||
| RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 | ||||
| RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 | ||||
| COPY --from=openblas     /opt/OpenBLAS/  /opt/OpenBLAS/ | ||||
| ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH | ||||
|  | ||||
| @ -61,7 +61,7 @@ RUN git config --global --add safe.directory "*" | ||||
| # NOTE: Need a better way to get this library as Ubuntu's package can be removed by the vender, or changed | ||||
| ############################################################################### | ||||
| RUN cd ~/ \ | ||||
|   && curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-4ubuntu2_arm64.deb \ | ||||
|   && curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb \ | ||||
|   && ar x ~/libgfortran-10-dev.deb \ | ||||
|   && tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ \ | ||||
|   && cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/ | ||||
|  | ||||
| @ -1,20 +1,17 @@ | ||||
| FROM quay.io/pypa/manylinux_2_28_s390x as base | ||||
| FROM --platform=linux/s390x docker.io/ubuntu:24.04 as base | ||||
|  | ||||
| # Language variables | ||||
| ENV LC_ALL=C.UTF-8 | ||||
| ENV LANG=C.UTF-8 | ||||
| ENV LANGUAGE=C.UTF-8 | ||||
|  | ||||
| ARG DEVTOOLSET_VERSION=13 | ||||
| # Installed needed OS packages. This is to support all | ||||
| # the binary builds (torch, vision, audio, text, data) | ||||
| RUN yum -y install epel-release | ||||
| RUN yum -y update | ||||
| RUN yum install -y \ | ||||
|   sudo \ | ||||
| RUN apt update ; apt upgrade -y | ||||
| RUN apt install -y \ | ||||
|   build-essential \ | ||||
|   autoconf \ | ||||
|   automake \ | ||||
|   bison \ | ||||
|   bzip2 \ | ||||
|   curl \ | ||||
|   diffutils \ | ||||
| @ -27,40 +24,19 @@ RUN yum install -y \ | ||||
|   util-linux \ | ||||
|   wget \ | ||||
|   which \ | ||||
|   xz \ | ||||
|   yasm \ | ||||
|   xz-utils \ | ||||
|   less \ | ||||
|   zstd \ | ||||
|   libgomp \ | ||||
|   gcc-toolset-${DEVTOOLSET_VERSION}-gcc \ | ||||
|   gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \ | ||||
|   gcc-toolset-${DEVTOOLSET_VERSION}-binutils \ | ||||
|   gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \ | ||||
|   cmake \ | ||||
|   rust \ | ||||
|   cargo \ | ||||
|   llvm-devel \ | ||||
|   libzstd-devel \ | ||||
|   python3.12-devel \ | ||||
|   python3.12-setuptools \ | ||||
|   python3.12-pip \ | ||||
|   python3-virtualenv \ | ||||
|   python3.12-pyyaml \ | ||||
|   python3.12-numpy \ | ||||
|   python3.12-wheel \ | ||||
|   python3.12-cryptography \ | ||||
|   blas-devel \ | ||||
|   openblas-devel \ | ||||
|   lapack-devel \ | ||||
|   atlas-devel \ | ||||
|   libjpeg-devel \ | ||||
|   libxslt-devel \ | ||||
|   libxml2-devel \ | ||||
|   openssl-devel \ | ||||
|   valgrind | ||||
|  | ||||
| ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
|   python3 \ | ||||
|   python3-dev \ | ||||
|   python3-setuptools \ | ||||
|   python3-yaml \ | ||||
|   python3-typing-extensions \ | ||||
|   libblas-dev \ | ||||
|   libopenblas-dev \ | ||||
|   liblapack-dev \ | ||||
|   libatlas-base-dev | ||||
|  | ||||
| # git236+ would refuse to run git commands in repos owned by other users | ||||
| # Which causes version check to fail, as pytorch repo is bind-mounted into the image | ||||
| @ -68,8 +44,14 @@ ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/op | ||||
| # For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327 | ||||
| RUN git config --global --add safe.directory "*" | ||||
|  | ||||
| # installed python doesn't have development parts. Rebuild it from scratch | ||||
| RUN /bin/rm -rf /opt/_internal /opt/python /usr/local/*/* | ||||
| FROM base as openssl | ||||
| # Install openssl (this must precede `build python` step) | ||||
| # (In order to have a proper SSL module, Python is compiled | ||||
| # against a recent openssl [see env vars above], which is linked | ||||
| # statically. We delete openssl afterwards.) | ||||
| ADD ./common/install_openssl.sh install_openssl.sh | ||||
| RUN bash ./install_openssl.sh && rm install_openssl.sh | ||||
| ENV SSL_CERT_FILE=/opt/_internal/certs.pem | ||||
|  | ||||
| # EPEL for cmake | ||||
| FROM base as patchelf | ||||
| @ -82,43 +64,10 @@ FROM patchelf as python | ||||
| # build python | ||||
| COPY manywheel/build_scripts /build_scripts | ||||
| ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh | ||||
| ENV SSL_CERT_FILE= | ||||
| RUN bash build_scripts/build.sh && rm -r build_scripts | ||||
|  | ||||
| FROM base as final | ||||
| FROM openssl as final | ||||
| COPY --from=python             /opt/python                           /opt/python | ||||
| COPY --from=python             /opt/_internal                        /opt/_internal | ||||
| COPY --from=python             /opt/python/cp39-cp39/bin/auditwheel  /usr/local/bin/auditwheel | ||||
| COPY --from=python             /opt/python/cp39-cp39/bin/auditwheel /usr/local/bin/auditwheel | ||||
| COPY --from=patchelf           /usr/local/bin/patchelf               /usr/local/bin/patchelf | ||||
|  | ||||
| RUN alternatives --set python /usr/bin/python3.12 | ||||
| RUN alternatives --set python3 /usr/bin/python3.12 | ||||
|  | ||||
| RUN pip-3.12 install typing_extensions | ||||
|  | ||||
| ENTRYPOINT [] | ||||
| CMD ["/bin/bash"] | ||||
|  | ||||
| # install test dependencies: | ||||
| # - grpcio requires system openssl, bundled crypto fails to build | ||||
| # - ml_dtypes 0.4.0 requires some fixes provided in later commits to build | ||||
| RUN dnf install -y \ | ||||
|   protobuf-devel \ | ||||
|   protobuf-c-devel \ | ||||
|   protobuf-lite-devel \ | ||||
|   wget \ | ||||
|   patch | ||||
|  | ||||
| RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio==1.65.4 | ||||
| RUN cd ~ && \ | ||||
|   git clone https://github.com/jax-ml/ml_dtypes && \ | ||||
|   cd ml_dtypes && \ | ||||
|   git checkout v0.4.0 && \ | ||||
|   git submodule update --init --recursive && \ | ||||
|   wget https://github.com/jax-ml/ml_dtypes/commit/b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \ | ||||
|   wget https://github.com/jax-ml/ml_dtypes/commit/d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \ | ||||
|   patch -p1 < b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \ | ||||
|   patch -p1 < d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \ | ||||
|   python3 setup.py bdist_wheel && \ | ||||
|   pip3 install dist/*.whl && \ | ||||
|   rm -rf ml_dtypes | ||||
|  | ||||
| @ -61,7 +61,7 @@ case ${GPU_ARCH_TYPE} in | ||||
|     cpu-s390x) | ||||
|         TARGET=final | ||||
|         DOCKER_TAG=cpu-s390x | ||||
|         GPU_IMAGE=s390x/almalinux:8 | ||||
|         GPU_IMAGE=redhat/ubi9 | ||||
|         DOCKER_GPU_BUILD_ARG="" | ||||
|         MANY_LINUX_VERSION="s390x" | ||||
|         ;; | ||||
| @ -87,18 +87,22 @@ case ${GPU_ARCH_TYPE} in | ||||
|         MANY_LINUX_VERSION="aarch64" | ||||
|         DOCKERFILE_SUFFIX="_cuda_aarch64" | ||||
|         ;; | ||||
|     rocm|rocm-manylinux_2_28) | ||||
|     rocm) | ||||
|         TARGET=rocm_final | ||||
|         DOCKER_TAG=rocm${GPU_ARCH_VERSION} | ||||
|         GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete | ||||
|         DEVTOOLSET_VERSION="9" | ||||
|         if [ ${GPU_ARCH_TYPE} == "rocm-manylinux_2_28" ]; then | ||||
|             MANY_LINUX_VERSION="2_28" | ||||
|             DEVTOOLSET_VERSION="11" | ||||
|             GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100" | ||||
|         ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)" | ||||
|         if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then | ||||
|             ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0})) | ||||
|         else | ||||
|             echo "ERROR: rocm regex failed" | ||||
|             exit 1 | ||||
|         fi | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101" | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" | ||||
|         if [[ $ROCM_VERSION_INT -ge 60000 ]]; then | ||||
|             PYTORCH_ROCM_ARCH+=";gfx942" | ||||
|         fi | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9" | ||||
|         ;; | ||||
|     xpu) | ||||
|         TARGET=xpu_final | ||||
| @ -120,16 +124,7 @@ if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then | ||||
| fi | ||||
| ( | ||||
|     set -x | ||||
|  | ||||
|     if [ "$(uname -m)" != "s390x" ]; then | ||||
|         # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712 | ||||
|         # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023. | ||||
|         sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service | ||||
|         sudo systemctl daemon-reload | ||||
|         sudo systemctl restart docker | ||||
|     fi | ||||
|  | ||||
|     DOCKER_BUILDKIT=1 docker build  \ | ||||
|     DOCKER_BUILDKIT=1 docker build \ | ||||
|         ${DOCKER_GPU_BUILD_ARG} \ | ||||
|         --build-arg "GPU_IMAGE=${GPU_IMAGE}" \ | ||||
|         --target "${TARGET}" \ | ||||
|  | ||||
| @ -16,27 +16,37 @@ CURL_HASH=cf34fe0b07b800f1c01a499a6e8b2af548f6d0e044dca4a29d88a4bee146d131 | ||||
| AUTOCONF_ROOT=autoconf-2.69 | ||||
| AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969 | ||||
|  | ||||
| # Dependencies for compiling Python that we want to remove from | ||||
| # the final image after compiling Python | ||||
| PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel" | ||||
|  | ||||
| if [ "$(uname -m)" != "s390x" ] ; then | ||||
|     PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} db4-devel" | ||||
| else | ||||
|     PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} libdb-devel" | ||||
| fi | ||||
|  | ||||
| # Libraries that are allowed as part of the manylinux1 profile | ||||
| MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel  mesa-libGL-devel libICE-devel libSM-devel ncurses-devel" | ||||
|  | ||||
| # Get build utilities | ||||
| MY_DIR=$(dirname "${BASH_SOURCE[0]}") | ||||
| source $MY_DIR/build_utils.sh | ||||
|  | ||||
| # Development tools and libraries | ||||
| yum -y install bzip2 make git patch unzip bison yasm diffutils \ | ||||
|     automake which file \ | ||||
|     ${PYTHON_COMPILE_DEPS} | ||||
| if [ "$(uname -m)" != "s390x" ] ; then | ||||
|     # Dependencies for compiling Python that we want to remove from | ||||
|     # the final image after compiling Python | ||||
|     PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel libffi-devel" | ||||
|  | ||||
|     # Libraries that are allowed as part of the manylinux1 profile | ||||
|     MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel  mesa-libGL-devel libICE-devel libSM-devel ncurses-devel" | ||||
|  | ||||
|     # Development tools and libraries | ||||
|     yum -y install bzip2 make git patch unzip bison yasm diffutils \ | ||||
|         automake which file cmake28 \ | ||||
|         kernel-devel-`uname -r` \ | ||||
|         ${PYTHON_COMPILE_DEPS} | ||||
| else | ||||
|     # Dependencies for compiling Python that we want to remove from | ||||
|     # the final image after compiling Python | ||||
|     PYTHON_COMPILE_DEPS="zlib1g-dev libbz2-dev libncurses-dev libsqlite3-dev libdb-dev libpcap-dev liblzma-dev libffi-dev" | ||||
|  | ||||
|     # Libraries that are allowed as part of the manylinux1 profile | ||||
|     MANYLINUX1_DEPS="libglib2.0-dev libX11-dev libncurses-dev" | ||||
|  | ||||
|     # Development tools and libraries | ||||
|     apt install -y bzip2 make git patch unzip diffutils \ | ||||
|         automake which file cmake \ | ||||
|         linux-headers-virtual \ | ||||
|         ${PYTHON_COMPILE_DEPS} | ||||
| fi | ||||
|  | ||||
| # Install newest autoconf | ||||
| build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH | ||||
| @ -82,13 +92,16 @@ ln -s $PY39_BIN/auditwheel /usr/local/bin/auditwheel | ||||
|  | ||||
| # Clean up development headers and other unnecessary stuff for | ||||
| # final image | ||||
| yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \ | ||||
|     avahi freetype bitstream-vera-fonts \ | ||||
|     ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1 | ||||
| yum -y install ${MANYLINUX1_DEPS} | ||||
| yum -y clean all > /dev/null 2>&1 | ||||
| yum list installed | ||||
|  | ||||
| if [ "$(uname -m)" != "s390x" ] ; then | ||||
|     yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \ | ||||
|         avahi freetype bitstream-vera-fonts \ | ||||
|         ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1 | ||||
|     yum -y install ${MANYLINUX1_DEPS} | ||||
|     yum -y clean all > /dev/null 2>&1 | ||||
|     yum list installed | ||||
| else | ||||
|     apt purge -y ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1 | ||||
| fi | ||||
| # we don't need libpython*.a, and they're many megabytes | ||||
| find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f | ||||
| # Strip what we can -- and ignore errors, because this just attempts to strip | ||||
|  | ||||
| @ -1,12 +1,10 @@ | ||||
| # cf. https://github.com/pypa/manylinux/issues/53 | ||||
|  | ||||
| import sys | ||||
| from urllib.request import urlopen | ||||
|  | ||||
|  | ||||
| GOOD_SSL = "https://google.com" | ||||
| BAD_SSL = "https://self-signed.badssl.com" | ||||
|  | ||||
| import sys | ||||
|  | ||||
|  | ||||
| print("Testing SSL certificate checking for Python:", sys.version) | ||||
|  | ||||
| @ -14,8 +12,14 @@ if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4): | ||||
|     print("This version never checks SSL certs; skipping tests") | ||||
|     sys.exit(0) | ||||
|  | ||||
| if sys.version_info[0] >= 3: | ||||
|     from urllib.request import urlopen | ||||
|  | ||||
| EXC = OSError | ||||
|     EXC = OSError | ||||
| else: | ||||
|     from urllib import urlopen | ||||
|  | ||||
|     EXC = IOError | ||||
|  | ||||
| print(f"Connecting to {GOOD_SSL} should work") | ||||
| urlopen(GOOD_SSL) | ||||
|  | ||||
| @ -5,7 +5,7 @@ | ||||
| #Pinned versions: 1.6 | ||||
| #test that import: | ||||
|  | ||||
| boto3==1.35.42 | ||||
| boto3==1.19.12 | ||||
| #Description: AWS SDK for python | ||||
| #Pinned versions: 1.19.12, 1.16.34 | ||||
| #test that import: | ||||
| @ -30,14 +30,9 @@ dill==0.3.7 | ||||
| #Pinned versions: 0.3.7 | ||||
| #test that import: dynamo/test_replay_record.py test_dataloader.py test_datapipe.py test_serialization.py | ||||
|  | ||||
| expecttest==0.2.1 | ||||
| expecttest==0.1.6 | ||||
| #Description: method for writing tests where test framework auto populates | ||||
| # the expected output based on previous runs | ||||
| #Pinned versions: 0.2.1 | ||||
| #test that import: | ||||
|  | ||||
| fbscribelogger==0.1.7 | ||||
| #Description: write to scribe from authenticated jobs on CI | ||||
| #Pinned versions: 0.1.6 | ||||
| #test that import: | ||||
|  | ||||
| @ -90,7 +85,7 @@ librosa>=0.6.2 ; python_version < "3.11" | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| mypy==1.13.0 | ||||
| mypy==1.10.0 | ||||
| # Pin MyPy version because new errors are likely to appear with each release | ||||
| #Description: linter | ||||
| #Pinned versions: 1.10.0 | ||||
| @ -118,7 +113,7 @@ numba==0.55.2 ; python_version == "3.10" | ||||
|  | ||||
| #numpy | ||||
| #Description: Provides N-dimensional arrays and linear algebra | ||||
| #Pinned versions: 1.26.2 | ||||
| #Pinned versions: 1.20 | ||||
| #test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py, | ||||
| #test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py, | ||||
| #test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py, | ||||
| @ -128,12 +123,6 @@ numba==0.55.2 ; python_version == "3.10" | ||||
| #test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py, | ||||
| #test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py, | ||||
| #test_binary_ufuncs.py | ||||
| numpy==1.22.4; python_version == "3.9" or python_version == "3.10" | ||||
| numpy==1.26.2; python_version == "3.11" or python_version == "3.12" | ||||
| numpy==2.1.2; python_version >= "3.13" | ||||
|  | ||||
| pandas==2.0.3; python_version < "3.13" | ||||
| pandas==2.2.3; python_version >= "3.13" | ||||
|  | ||||
| #onnxruntime | ||||
| #Description: scoring engine for Open Neural Network Exchange (ONNX) models | ||||
| @ -145,9 +134,9 @@ opt-einsum==3.3 | ||||
| #Pinned versions: 3.3 | ||||
| #test that import: test_linalg.py | ||||
|  | ||||
| optree==0.13.0 | ||||
| optree==0.12.1 | ||||
| #Description: A library for tree manipulation | ||||
| #Pinned versions: 0.13.0 | ||||
| #Pinned versions: 0.12.1 | ||||
| #test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py, | ||||
| #test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py, | ||||
| #common_utils.py, test_eager_transforms.py, test_python_dispatch.py, | ||||
| @ -158,7 +147,7 @@ optree==0.13.0 | ||||
| #test_pointwise_ops.py, test_dtensor_ops.py, test_torchinductor.py, test_fx.py, | ||||
| #test_fake_tensor.py, test_mps.py | ||||
|  | ||||
| pillow==11.0.0 | ||||
| pillow==10.3.0 | ||||
| #Description:  Python Imaging Library fork | ||||
| #Pinned versions: 10.3.0 | ||||
| #test that import: | ||||
| @ -193,11 +182,6 @@ pytest-rerunfailures>=10.3 | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| pytest-subtests==0.13.1 | ||||
| #Description: plugin for subtest support | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| #pytest-benchmark | ||||
| #Description: fixture for benchmarking code | ||||
| #Pinned versions: 3.2.3 | ||||
| @ -245,7 +229,7 @@ scikit-image==0.22.0 ; python_version >= "3.10" | ||||
| #test that import: | ||||
|  | ||||
| scipy==1.10.1 ; python_version <= "3.11" | ||||
| scipy==1.14.1 ; python_version >= "3.12" | ||||
| scipy==1.12.0 ; python_version == "3.12" | ||||
| # Pin SciPy because of failing distribution tests (see #60347) | ||||
| #Description: scientific python | ||||
| #Pinned versions: 1.10.1 | ||||
| @ -264,7 +248,7 @@ tb-nightly==2.13.0a20230426 | ||||
| #test that import: | ||||
|  | ||||
| # needed by torchgen utils | ||||
| typing-extensions>=4.10.0 | ||||
| typing-extensions | ||||
| #Description: type hints for python | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
| @ -289,6 +273,11 @@ redis>=4.0.0 | ||||
| #Description: redis database | ||||
| #test that import: anything that tests OSS caching/mocking (inductor/test_codecache.py, inductor/test_max_autotune.py) | ||||
|  | ||||
| rockset==1.0.3 | ||||
| #Description: queries Rockset | ||||
| #Pinned versions: 1.0.3 | ||||
| #test that import: | ||||
|  | ||||
| ghstack==0.8.0 | ||||
| #Description: ghstack tool | ||||
| #Pinned versions: 0.8.0 | ||||
| @ -309,32 +298,32 @@ z3-solver==4.12.2.0 | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| tensorboard==2.13.0 ; python_version < "3.13" | ||||
| tensorboard==2.18.0 ; python_version >= "3.13" | ||||
| tensorboard==2.13.0 | ||||
| #Description: Also included in .ci/docker/requirements-docs.txt | ||||
| #Pinned versions: | ||||
| #test that import: test_tensorboard | ||||
|  | ||||
| pywavelets==1.4.1 ; python_version < "3.12" | ||||
| pywavelets==1.7.0 ; python_version >= "3.12" | ||||
| pywavelets==1.5.0 ; python_version >= "3.12" | ||||
| #Description: This is a requirement of scikit-image, we need to pin | ||||
| # it here because 1.5.0 conflicts with numpy 1.21.2 used in CI | ||||
| #Pinned versions: 1.4.1 | ||||
| #test that import: | ||||
|  | ||||
| lxml==5.3.0 | ||||
| lxml==5.0.0 | ||||
| #Description: This is a requirement of unittest-xml-reporting | ||||
|  | ||||
| # Python-3.9 binaries | ||||
|  | ||||
| PyGithub==2.3.0 | ||||
|  | ||||
| sympy==1.12.1 ; python_version == "3.8" | ||||
| sympy==1.13.1 ; python_version >= "3.9" | ||||
| #Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| onnx==1.17.0 | ||||
| onnx==1.16.1 | ||||
| #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
| @ -343,31 +332,3 @@ onnxscript==0.1.0.dev20240817 | ||||
| #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| parameterized==0.8.1 | ||||
| #Description: Parameterizes unittests, both the tests themselves and the entire testing class | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| #Description: required for testing torch/distributed/_tools/sac_estimator.py | ||||
| #Pinned versions: 1.24.0 | ||||
| #test that import: test_sac_estimator.py | ||||
|  | ||||
| pwlf==2.2.1 ; python_version >= "3.8" | ||||
| #Description: required for testing torch/distributed/_tools/sac_estimator.py | ||||
| #Pinned versions: 2.2.1 | ||||
| #test that import: test_sac_estimator.py | ||||
|  | ||||
|  | ||||
| # To build PyTorch itself | ||||
| astunparse | ||||
| PyYAML | ||||
| setuptools | ||||
|  | ||||
| ninja==1.11.1 ; platform_machine == "aarch64" | ||||
| scons==4.5.2 ; platform_machine == "aarch64" | ||||
|  | ||||
| pulp==2.9.0 ; python_version >= "3.8" | ||||
| #Description: required for testing ilp formulaiton under torch/distributed/_tools | ||||
| #Pinned versions: 2.9.0 | ||||
| #test that import: test_sac_ilp.py | ||||
|  | ||||
| @ -14,8 +14,7 @@ matplotlib==3.5.3 | ||||
| #Description: This is used to generate PyTorch docs | ||||
| #Pinned versions: 3.5.3 | ||||
|  | ||||
| tensorboard==2.13.0 ; python_version < "3.13" | ||||
| tensorboard==2.18.0 ; python_version >= "3.13" | ||||
| tensorboard==2.13.0 | ||||
| #Description: This is used to generate PyTorch docs | ||||
| #Pinned versions: 2.13.0 | ||||
|  | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 3.2.0 | ||||
| 3.0.0 | ||||
|  | ||||
| @ -30,8 +30,7 @@ ARG CONDA_CMAKE | ||||
| COPY requirements-ci.txt /opt/conda/requirements-ci.txt | ||||
| COPY ./common/install_conda.sh install_conda.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./common/install_magma_conda.sh install_magma_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt | ||||
|  | ||||
| # Install gcc | ||||
| ARG GCC_VERSION | ||||
| @ -81,8 +80,6 @@ RUN bash ./install_openssl.sh | ||||
| ENV OPENSSL_DIR /opt/openssl | ||||
|  | ||||
| ARG INDUCTOR_BENCHMARKS | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/huggingface.txt huggingface.txt | ||||
|  | ||||
| @ -68,8 +68,6 @@ RUN rm install_rocm.sh | ||||
| COPY ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh | ||||
| RUN rm install_rocm_magma.sh | ||||
| ADD ./common/install_miopen.sh install_miopen.sh | ||||
| RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh | ||||
| ENV ROCM_PATH /opt/rocm | ||||
| ENV PATH /opt/rocm/bin:$PATH | ||||
| ENV PATH /opt/rocm/hcc/bin:$PATH | ||||
| @ -102,10 +100,10 @@ ARG TRITON | ||||
| # try to reach out to S3, which docker build runners don't have access | ||||
| COPY ./common/install_triton.sh install_triton.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/triton.txt triton.txt | ||||
| COPY ci_commit_pins/triton-rocm.txt triton-rocm.txt | ||||
| COPY triton_version.txt triton_version.txt | ||||
| RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt | ||||
| RUN rm install_triton.sh common_utils.sh triton-rocm.txt triton_version.txt | ||||
|  | ||||
| # Install AOTriton | ||||
| COPY ./aotriton_version.txt aotriton_version.txt | ||||
| @ -123,8 +121,5 @@ RUN bash ./install_cache.sh && rm install_cache.sh | ||||
| ARG BUILD_ENVIRONMENT | ||||
| ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT} | ||||
|  | ||||
| # Install LLVM dev version (Defined in the pytorch/builder github repository) | ||||
| COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm | ||||
|  | ||||
| USER jenkins | ||||
| CMD ["bash"] | ||||
|  | ||||
| @ -36,8 +36,7 @@ ENV DOCS=$DOCS | ||||
| COPY requirements-ci.txt requirements-docs.txt /opt/conda/ | ||||
| COPY ./common/install_conda.sh install_conda.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./common/install_magma_conda.sh install_magma_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt | ||||
| RUN if [ -n "${UNINSTALL_DILL}" ]; then pip uninstall -y dill; fi | ||||
|  | ||||
| # Install gcc | ||||
| @ -88,6 +87,19 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi | ||||
| RUN rm install_vision.sh cache_vision_models.sh common_utils.sh | ||||
| ENV INSTALLED_VISION ${VISION} | ||||
|  | ||||
| # (optional) Install Android NDK | ||||
| ARG ANDROID | ||||
| ARG ANDROID_NDK | ||||
| ARG GRADLE_VERSION | ||||
| COPY ./common/install_android.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ | ||||
| COPY ./android/AndroidManifest.xml AndroidManifest.xml | ||||
| COPY ./android/build.gradle build.gradle | ||||
| RUN if [ -n "${ANDROID}" ]; then bash ./install_android.sh; fi | ||||
| RUN rm install_android.sh cache_vision_models.sh common_utils.sh | ||||
| RUN rm AndroidManifest.xml | ||||
| RUN rm build.gradle | ||||
| ENV INSTALLED_ANDROID ${ANDROID} | ||||
|  | ||||
| # (optional) Install Vulkan SDK | ||||
| ARG VULKAN_SDK_VERSION | ||||
| COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh | ||||
| @ -135,13 +147,6 @@ COPY ci_commit_pins/triton.txt triton.txt | ||||
| RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton.txt | ||||
|  | ||||
| ARG TRITON_CPU | ||||
| COPY ./common/install_triton.sh install_triton.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt | ||||
| RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton-cpu.txt | ||||
|  | ||||
| ARG EXECUTORCH | ||||
| # Build and install executorch | ||||
| COPY ./common/install_executorch.sh install_executorch.sh | ||||
|  | ||||
| @ -1,10 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| # This is mostly just a shim to manywheel/build.sh | ||||
| # TODO: Make this a dedicated script to build just libtorch | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||||
|  | ||||
| USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh | ||||
							
								
								
									
										2
									
								
								.ci/magma/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.ci/magma/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,2 +0,0 @@ | ||||
| output/ | ||||
| magma-cuda*/ | ||||
| @ -1,48 +0,0 @@ | ||||
| SHELL=/usr/bin/env bash | ||||
|  | ||||
| DOCKER_CMD ?= docker | ||||
| DESIRED_CUDA ?= 11.8 | ||||
| DESIRED_CUDA_SHORT = $(subst .,,$(DESIRED_CUDA)) | ||||
| PACKAGE_NAME = magma-cuda | ||||
| CUDA_ARCH_LIST ?= -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 | ||||
|  | ||||
| DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \ | ||||
| 	-v $(shell git rev-parse --show-toplevel)/.ci:/builder \ | ||||
| 	-w /builder \ | ||||
| 	-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_CUDA_SHORT} \ | ||||
| 	-e DESIRED_CUDA=${DESIRED_CUDA} \ | ||||
| 	-e CUDA_ARCH_LIST="${CUDA_ARCH_LIST}" \ | ||||
| 	"pytorch/manylinux-builder:cuda${DESIRED_CUDA}-main" \ | ||||
| 	magma/build_magma.sh | ||||
|  | ||||
| .PHONY: all | ||||
| all: magma-cuda126 | ||||
| all: magma-cuda124 | ||||
| all: magma-cuda121 | ||||
| all: magma-cuda118 | ||||
|  | ||||
| .PHONY: | ||||
| clean: | ||||
| 	$(RM) -r magma-* | ||||
| 	$(RM) -r output | ||||
|  | ||||
| .PHONY: magma-cuda126 | ||||
| magma-cuda126: DESIRED_CUDA := 12.6 | ||||
| magma-cuda126: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-cuda124 | ||||
| magma-cuda124: DESIRED_CUDA := 12.4 | ||||
| magma-cuda124: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-cuda121 | ||||
| magma-cuda121: DESIRED_CUDA := 12.1 | ||||
| magma-cuda121: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-cuda118 | ||||
| magma-cuda118: DESIRED_CUDA := 11.8 | ||||
| magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_37,code=sm_37 | ||||
| magma-cuda118: | ||||
| 	$(DOCKER_RUN) | ||||
| @ -1,50 +0,0 @@ | ||||
| # Magma | ||||
|  | ||||
| This folder contains the scripts and configurations to build magma, statically linked for various versions of CUDA. | ||||
|  | ||||
| ## Building | ||||
|  | ||||
| Look in the `Makefile` for available targets to build. To build any target, for example `magma-cuda118`, run | ||||
|  | ||||
| ``` | ||||
| # Using `docker` | ||||
| make magma-cuda118 | ||||
|  | ||||
| # Using `podman` | ||||
| DOCKER_CMD=podman make magma-cuda118 | ||||
| ``` | ||||
|  | ||||
| This spawns a `pytorch/manylinux-cuda<version>` docker image, which has the required `devtoolset` and CUDA versions installed. | ||||
| Within the docker image, it runs `build_magma.sh` with the correct environment variables set, which package the necessary files | ||||
| into a tarball, with the following structure: | ||||
|  | ||||
| ``` | ||||
| . | ||||
| ├── include       # header files | ||||
| ├── lib           # libmagma.a | ||||
| ├── info | ||||
| │   ├── licenses  # license file | ||||
| │   └── recipe    # build script and patches | ||||
| ``` | ||||
|  | ||||
| More specifically, `build_magma.sh` copies over the relevant files from the `package_files` directory depending on the CUDA version. | ||||
| Outputted binaries should be in the `output` folder. | ||||
|  | ||||
|  | ||||
| ## Pushing | ||||
|  | ||||
| Packages can be uploaded to an S3 bucket using: | ||||
|  | ||||
| ``` | ||||
| aws s3 cp output/*/magma-cuda*.bz2 <bucket-with-path> | ||||
| ``` | ||||
|  | ||||
| If you do not have upload permissions, please ping @seemethere or @soumith to gain access | ||||
|  | ||||
| ## New versions | ||||
|  | ||||
| New CUDA versions can be added by creating a new make target with the next desired version. For CUDA version NN.n, the target should be named `magma-cudaNNn`. | ||||
|  | ||||
| Make sure to edit the appropriate environment variables (e.g., DESIRED_CUDA, CUDA_ARCH_LIST) in the `Makefile` accordingly. Remember also to check `build_magma.sh` to ensure the logic for copying over the files remains correct. | ||||
|  | ||||
| New patches can be added by editing `Makefile` and`build_magma.sh` the same way `getrf_nbparam.patch` is implemented. | ||||
| @ -1,50 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| set -eou pipefail | ||||
|  | ||||
| # Environment variables | ||||
| # The script expects DESIRED_CUDA and PACKAGE_NAME to be set | ||||
| ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" | ||||
| MAGMA_VERSION=2.6.1 | ||||
|  | ||||
| # Folders for the build | ||||
| PACKAGE_FILES=${ROOT_DIR}/magma/package_files # source patches and metadata | ||||
| PACKAGE_DIR=${ROOT_DIR}/magma/${PACKAGE_NAME} # build workspace | ||||
| PACKAGE_OUTPUT=${ROOT_DIR}/magma/output # where tarballs are stored | ||||
| PACKAGE_BUILD=${PACKAGE_DIR}/build # where the content of the tarball is prepared | ||||
| PACKAGE_RECIPE=${PACKAGE_BUILD}/info/recipe | ||||
| PACKAGE_LICENSE=${PACKAGE_BUILD}/info/licenses | ||||
| mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RECIPE} ${PACKAGE_LICENSE} | ||||
|  | ||||
| # Fetch magma sources and verify checksum | ||||
| pushd ${PACKAGE_DIR} | ||||
| curl -LO http://icl.utk.edu/projectsfiles/magma/downloads/magma-${MAGMA_VERSION}.tar.gz | ||||
| tar zxf magma-${MAGMA_VERSION}.tar.gz | ||||
| sha256sum --check < ${PACKAGE_FILES}/magma-${MAGMA_VERSION}.sha256 | ||||
| popd | ||||
|  | ||||
| # Apply patches and build | ||||
| pushd ${PACKAGE_DIR}/magma-${MAGMA_VERSION} | ||||
| patch < ${PACKAGE_FILES}/CMake.patch | ||||
| patch < ${PACKAGE_FILES}/cmakelists.patch | ||||
| patch -p0 < ${PACKAGE_FILES}/thread_queue.patch | ||||
| patch -p1 < ${PACKAGE_FILES}/getrf_shfl.patch | ||||
| patch -p1 < ${PACKAGE_FILES}/getrf_nbparam.patch | ||||
| # The build.sh script expects to be executed from the sources root folder | ||||
| INSTALL_DIR=${PACKAGE_BUILD} ${PACKAGE_FILES}/build.sh | ||||
| popd | ||||
|  | ||||
| # Package recipe, license and tarball | ||||
| # Folder and package name are backward compatible for the build workflow | ||||
| cp ${PACKAGE_FILES}/build.sh ${PACKAGE_RECIPE}/build.sh | ||||
| cp ${PACKAGE_FILES}/thread_queue.patch ${PACKAGE_RECIPE}/thread_queue.patch | ||||
| cp ${PACKAGE_FILES}/cmakelists.patch ${PACKAGE_RECIPE}/cmakelists.patch | ||||
| cp ${PACKAGE_FILES}/getrf_shfl.patch ${PACKAGE_RECIPE}/getrf_shfl.patch | ||||
| cp ${PACKAGE_FILES}/getrf_nbparam.patch ${PACKAGE_RECIPE}/getrf_nbparam.patch | ||||
| cp ${PACKAGE_FILES}/CMake.patch ${PACKAGE_RECIPE}/CMake.patch | ||||
| cp ${PACKAGE_FILES}/magma-${MAGMA_VERSION}.sha256 ${PACKAGE_RECIPE}/magma-${MAGMA_VERSION}.sha256 | ||||
| cp ${PACKAGE_DIR}/magma-${MAGMA_VERSION}/COPYRIGHT ${PACKAGE_LICENSE}/COPYRIGHT | ||||
| pushd ${PACKAGE_BUILD} | ||||
| tar cjf ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2 include lib info | ||||
| echo Built in ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2 | ||||
| popd | ||||
| @ -1,40 +0,0 @@ | ||||
| --- CMake.src.cuda	2023-03-29 10:05:32.136954140 +0000 | ||||
| +++ CMake.src.cuda	2023-03-29 10:05:50.281318043 +0000 | ||||
| @@ -283,10 +283,10 @@ | ||||
|  magmablas/zgeadd.cu | ||||
|  magmablas/zgeadd2.cu | ||||
|  magmablas/zgeam.cu | ||||
| -magmablas/zgemm_fermi.cu | ||||
| +#magmablas/zgemm_fermi.cu | ||||
|  magmablas/zgemm_reduce.cu | ||||
|  magmablas/zgemv_conj.cu | ||||
| -magmablas/zgemv_fermi.cu | ||||
| +#magmablas/zgemv_fermi.cu | ||||
|  magmablas/zgerbt.cu | ||||
|  magmablas/zgerbt_kernels.cu | ||||
|  magmablas/zgetmatrix_transpose.cpp | ||||
| @@ -1009,18 +1009,18 @@ | ||||
|  magmablas/sgeam.cu | ||||
|  magmablas/dgeam.cu | ||||
|  magmablas/cgeam.cu | ||||
| -magmablas/sgemm_fermi.cu | ||||
| -magmablas/dgemm_fermi.cu | ||||
| -magmablas/cgemm_fermi.cu | ||||
| +#magmablas/sgemm_fermi.cu | ||||
| +#magmablas/dgemm_fermi.cu | ||||
| +#magmablas/cgemm_fermi.cu | ||||
|  magmablas/sgemm_reduce.cu | ||||
|  magmablas/dgemm_reduce.cu | ||||
|  magmablas/cgemm_reduce.cu | ||||
|  magmablas/sgemv_conj.cu | ||||
|  magmablas/dgemv_conj.cu | ||||
|  magmablas/cgemv_conj.cu | ||||
| -magmablas/sgemv_fermi.cu | ||||
| -magmablas/dgemv_fermi.cu | ||||
| -magmablas/cgemv_fermi.cu | ||||
| +#magmablas/sgemv_fermi.cu | ||||
| +#magmablas/dgemv_fermi.cu | ||||
| +#magmablas/cgemv_fermi.cu | ||||
|  magmablas/sgerbt.cu | ||||
|  magmablas/dgerbt.cu | ||||
|  magmablas/cgerbt.cu | ||||
| @ -1,12 +0,0 @@ | ||||
| CUDA__VERSION=$(nvcc --version|sed -n 4p|cut -f5 -d" "|cut -f1 -d",") | ||||
| if [ "$CUDA__VERSION" != "$DESIRED_CUDA" ]; then | ||||
|     echo "CUDA Version is not $DESIRED_CUDA. CUDA Version found: $CUDA__VERSION" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| mkdir build | ||||
| cd build | ||||
| cmake .. -DUSE_FORTRAN=OFF -DGPU_TARGET="All" -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" -DCUDA_ARCH_LIST="$CUDA_ARCH_LIST" | ||||
| make -j$(getconf _NPROCESSORS_CONF) | ||||
| make install | ||||
| cd .. | ||||
| @ -1,388 +0,0 @@ | ||||
| diff --git a/CMakeLists.txt b/CMakeLists.txt | ||||
| index d5d8d87d..8a507334 100644 | ||||
| --- a/CMakeLists.txt | ||||
| +++ b/CMakeLists.txt | ||||
| @@ -3,7 +3,7 @@ cmake_minimum_required( VERSION 2.8.1 ) | ||||
|  # ---------------------------------------- | ||||
|  # to disable Fortran, set this to "off" | ||||
|  # see also -DADD_ below | ||||
| -option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON ) | ||||
| +option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" OFF ) | ||||
|  | ||||
|  if (USE_FORTRAN) | ||||
|      project( MAGMA C CXX Fortran ) | ||||
| @@ -75,6 +75,8 @@ else() | ||||
|      message( WARNING "The compiler ${CMAKE_CXX_COMPILER} doesn't support the -std=c++11 flag. Some code may not compile.") | ||||
|  endif() | ||||
|  | ||||
| +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libstdc++ -fno-exceptions") | ||||
| + | ||||
|  CHECK_C_COMPILER_FLAG("-std=c99" COMPILER_SUPPORTS_C99) | ||||
|  if (COMPILER_SUPPORTS_C99) | ||||
|      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99") | ||||
| @@ -101,15 +103,15 @@ endif() | ||||
|  | ||||
|  | ||||
|  # ---------------------------------------- | ||||
| -# locate OpenMP | ||||
| -find_package( OpenMP ) | ||||
| -if (OPENMP_FOUND) | ||||
| -    message( STATUS "Found OpenMP" ) | ||||
| -    message( STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}" ) | ||||
| -    message( STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" ) | ||||
| -    set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) | ||||
| -    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) | ||||
| -endif() | ||||
| +# # locate OpenMP | ||||
| +# find_package( OpenMP ) | ||||
| +# if (OPENMP_FOUND) | ||||
| +#     message( STATUS "Found OpenMP" ) | ||||
| +#     message( STATUS "    OpenMP_C_FLAGS   ${OpenMP_C_FLAGS}" ) | ||||
| +#     message( STATUS "    OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" ) | ||||
| +#     set( CMAKE_C_FLAGS   "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" ) | ||||
| +#     set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" ) | ||||
| +# endif() | ||||
|  | ||||
|  if (MAGMA_ENABLE_CUDA) | ||||
|    # ---------------------------------------- | ||||
| @@ -132,7 +134,7 @@ if (MAGMA_ENABLE_CUDA) | ||||
|      set( NV_SM    "" ) | ||||
|      set( NV_COMP  "" ) | ||||
|  | ||||
| -    set(CUDA_SEPARABLE_COMPILATION ON) | ||||
| +    set(CUDA_SEPARABLE_COMPILATION OFF) | ||||
|  | ||||
|      # nvcc >= 6.5 supports -std=c++11, so propagate CXXFLAGS to NVCCFLAGS. | ||||
|      # Older nvcc didn't support -std=c++11, so previously we disabled propagation. | ||||
| @@ -294,11 +296,18 @@ if (MAGMA_ENABLE_CUDA) | ||||
|          message( STATUS "    compile for CUDA arch 8.0 (Ampere)" ) | ||||
|      endif() | ||||
|  | ||||
| +    if ( ${GPU_TARGET} MATCHES "All") | ||||
| +        set( MIN_ARCH 370) | ||||
| +        SET( NV_SM ${CUDA_ARCH_LIST}) | ||||
| +        SET( NV_COMP "") | ||||
| +    endif() | ||||
| + | ||||
|      if (NOT MIN_ARCH) | ||||
|          message( FATAL_ERROR "GPU_TARGET must contain one or more of Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, or valid sm_[0-9][0-9]" ) | ||||
|      endif() | ||||
|  | ||||
| -    set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} ) | ||||
| +    set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -DHAVE_CUBLAS -Xfatbin -compress-all -Xcompiler -fPIC -std=c++11 ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} ) | ||||
| +    MESSAGE(STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS}") | ||||
|      #add_definitions( "-DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MIN_ARCH}" ) | ||||
|      set(MAGMA_HAVE_CUDA "1") | ||||
|      set(MAGMA_CUDA_ARCH_MIN "${MIN_ARCH}") | ||||
| @@ -413,7 +422,7 @@ set_property(CACHE BLA_VENDOR PROPERTY STRINGS | ||||
|  set( LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" ) | ||||
|  if (LAPACK_LIBRARIES STREQUAL "") | ||||
|      message( STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake." ) | ||||
| -    find_package( LAPACK ) | ||||
| +    # find_package( LAPACK ) | ||||
|      # force showing updated LAPACK_LIBRARIES in ccmake / cmake-gui. | ||||
|      set( LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE ) | ||||
|  else() | ||||
| @@ -552,12 +561,12 @@ if (WIN32) | ||||
|      #message( "libmagma_all_f   ${libmagma_all_f}"   ) | ||||
|  | ||||
|      # on Windows, Fortran files aren't compiled if listed here... | ||||
| -    cuda_add_library( magma ${libmagma_all_cpp} ) | ||||
| +    cuda_add_library( magma STATIC ${libmagma_all_cpp} OPTIONS --compiler-options "-fPIC") | ||||
|      target_link_libraries( magma | ||||
|          ${LAPACK_LIBRARIES} | ||||
|          ${CUDA_CUDART_LIBRARY} | ||||
|          ${CUDA_CUBLAS_LIBRARIES} | ||||
| -        ${CUDA_cusparse_LIBRARY} | ||||
| +        # ${CUDA_cusparse_LIBRARY} | ||||
|      ) | ||||
|  | ||||
|      # no Fortran files at the moment (how to test libmagma_all_f is not empty?), | ||||
| @@ -575,13 +584,13 @@ if (WIN32) | ||||
|  else() | ||||
|      # Unix doesn't seem to have a problem with mixing C, CUDA, and Fortran files | ||||
|      if (MAGMA_ENABLE_CUDA) | ||||
| -      cuda_add_library( magma ${libmagma_all} ) | ||||
| +      cuda_add_library( magma STATIC ${libmagma_all} OPTIONS --compiler-options "-fPIC") | ||||
|        target_link_libraries( magma | ||||
|          ${blas_fix} | ||||
|          ${LAPACK_LIBRARIES} | ||||
|          ${CUDA_CUDART_LIBRARY} | ||||
|          ${CUDA_CUBLAS_LIBRARIES} | ||||
| -        ${CUDA_cusparse_LIBRARY} | ||||
| +        # ${CUDA_cusparse_LIBRARY} | ||||
|  	) | ||||
|      else() | ||||
|        find_package( hipBLAS ) | ||||
| @@ -614,138 +623,139 @@ else() | ||||
|      endif() | ||||
|  endif() | ||||
|  add_custom_target( lib DEPENDS magma ) | ||||
| - | ||||
| - | ||||
| -# ---------------------------------------- | ||||
| -# compile lapacktest library | ||||
| -# If use fortran, compile only Fortran files, not magma_[sdcz]_no_fortran.cpp | ||||
| -# else,           compile only C++     files, not Fortran files | ||||
| -if (USE_FORTRAN) | ||||
| -    foreach( filename ${liblapacktest_all} ) | ||||
| -        if (filename MATCHES "\\.(f|f90|F90)$") | ||||
| -            list( APPEND liblapacktest_all_f ${filename} ) | ||||
| -        endif() | ||||
| -    endforeach() | ||||
| -    add_library( lapacktest ${liblapacktest_all_f} ) | ||||
| -else() | ||||
| -    # alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp | ||||
| -    foreach( filename ${liblapacktest_all} ) | ||||
| -        if (filename MATCHES "\\.(c|cu|cpp)$") | ||||
| -            list( APPEND liblapacktest_all_cpp ${filename} ) | ||||
| -        endif() | ||||
| -    endforeach() | ||||
| -    add_library( lapacktest ${liblapacktest_all_cpp} ) | ||||
| -endif() | ||||
| -target_link_libraries( lapacktest | ||||
| -    ${blas_fix} | ||||
| -    ${LAPACK_LIBRARIES} | ||||
| -) | ||||
| - | ||||
| - | ||||
| -# ---------------------------------------- | ||||
| -# compile tester library | ||||
| -add_library( tester ${libtest_all} ) | ||||
| -target_link_libraries( tester | ||||
| -    magma | ||||
| -    lapacktest | ||||
| -    ${blas_fix} | ||||
| -    ${LAPACK_LIBRARIES} | ||||
| -) | ||||
| +set_target_properties(magma PROPERTIES POSITION_INDEPENDENT_CODE ON) | ||||
| + | ||||
| + | ||||
| +# # ---------------------------------------- | ||||
| +# # compile lapacktest library | ||||
| +# # If use fortran, compile only Fortran files, not magma_[sdcz]_no_fortran.cpp | ||||
| +# # else,           compile only C++     files, not Fortran files | ||||
| +# if (USE_FORTRAN) | ||||
| +#     foreach( filename ${liblapacktest_all} ) | ||||
| +#         if (filename MATCHES "\\.(f|f90|F90)$") | ||||
| +#             list( APPEND liblapacktest_all_f ${filename} ) | ||||
| +#         endif() | ||||
| +#     endforeach() | ||||
| +#     add_library( lapacktest ${liblapacktest_all_f} ) | ||||
| +# else() | ||||
| +#     # alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp | ||||
| +#     foreach( filename ${liblapacktest_all} ) | ||||
| +#         if (filename MATCHES "\\.(c|cu|cpp)$") | ||||
| +#             list( APPEND liblapacktest_all_cpp ${filename} ) | ||||
| +#         endif() | ||||
| +#     endforeach() | ||||
| +#     add_library( lapacktest ${liblapacktest_all_cpp} ) | ||||
| +# endif() | ||||
| +# target_link_libraries( lapacktest | ||||
| +#     ${blas_fix} | ||||
| +#     ${LAPACK_LIBRARIES} | ||||
| +# ) | ||||
| + | ||||
| + | ||||
| +# # ---------------------------------------- | ||||
| +# # compile tester library | ||||
| +# add_library( tester ${libtest_all} ) | ||||
| +# target_link_libraries( tester | ||||
| +#     magma | ||||
| +#     lapacktest | ||||
| +#     ${blas_fix} | ||||
| +#     ${LAPACK_LIBRARIES} | ||||
| +# ) | ||||
|  | ||||
|  | ||||
|  # ---------------------------------------- | ||||
|  # compile MAGMA sparse library | ||||
|  | ||||
|  # sparse doesn't have Fortran at the moment, so no need for above shenanigans | ||||
| -if (MAGMA_ENABLE_CUDA) | ||||
| -  include_directories( sparse/include ) | ||||
| -  include_directories( sparse/control ) | ||||
| -else() | ||||
| -  include_directories( sparse_hip/include ) | ||||
| -  include_directories( sparse_hip/control ) | ||||
| -endif() | ||||
| -include_directories( testing ) | ||||
| - | ||||
| -if (MAGMA_ENABLE_CUDA) | ||||
| -  cuda_add_library( magma_sparse ${libsparse_all} ) | ||||
| -  target_link_libraries( magma_sparse | ||||
| -    magma | ||||
| -    ${blas_fix} | ||||
| -    ${LAPACK_LIBRARIES} | ||||
| -    ${CUDA_CUDART_LIBRARY} | ||||
| -    ${CUDA_CUBLAS_LIBRARIES} | ||||
| -    ${CUDA_cusparse_LIBRARY} | ||||
| -    ) | ||||
| -else() | ||||
| -  add_library( magma_sparse ${libsparse_all} ) | ||||
| -  target_link_libraries( magma_sparse | ||||
| -    magma | ||||
| -    ${blas_fix} | ||||
| -    ${LAPACK_LIBRARIES} | ||||
| -    hip::device | ||||
| -    roc::hipblas | ||||
| -    roc::hipsparse | ||||
| -    ) | ||||
| -endif() | ||||
| -add_custom_target( sparse-lib DEPENDS magma_sparse ) | ||||
| - | ||||
| - | ||||
| -# ---------------------------------------- | ||||
| -# compile each tester | ||||
| - | ||||
| -# save testers to testing/ | ||||
| -# save tester lib files to testing_lib/ to avoid cluttering lib/ | ||||
| -set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing ) | ||||
| -set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib ) | ||||
| -set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib ) | ||||
| - | ||||
| -# skip Fortran testers, which require an extra file from CUDA | ||||
| -foreach( filename ${testing_all} ) | ||||
| -    if (filename MATCHES "\\.(c|cu|cpp)$") | ||||
| -        list( APPEND testing_all_cpp ${filename} ) | ||||
| -    endif() | ||||
| -endforeach() | ||||
| -foreach( TEST ${testing_all_cpp} ) | ||||
| -    string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} ) | ||||
| -    string( REGEX REPLACE "testing/" "" EXE ${EXE} ) | ||||
| -    #message( "${TEST} --> ${EXE}" ) | ||||
| -    add_executable( ${EXE} ${TEST} ) | ||||
| -    target_link_libraries( ${EXE} tester lapacktest magma ) | ||||
| -    list( APPEND testing ${EXE} ) | ||||
| -endforeach() | ||||
| -add_custom_target( testing DEPENDS ${testing} ) | ||||
| - | ||||
| - | ||||
| -# ---------------------------------------- | ||||
| -# compile each sparse tester | ||||
| - | ||||
| -if (MAGMA_ENABLE_CUDA) | ||||
| -  set(SPARSE_TEST_DIR "sparse/testing") | ||||
| -else() | ||||
| -  set(SPARSE_TEST_DIR "sparse_hip/testing") | ||||
| -endif() | ||||
| - | ||||
| - | ||||
| -set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" ) | ||||
| -cmake_policy( SET CMP0037 OLD) | ||||
| -foreach( TEST ${sparse_testing_all} ) | ||||
| -    string( REGEX REPLACE "\\.(cpp|f90|F90)"     "" EXE ${TEST} ) | ||||
| -    string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} ) | ||||
| -    #message( "${TEST} --> ${EXE}" ) | ||||
| -    add_executable( ${EXE} ${TEST} ) | ||||
| -    target_link_libraries( ${EXE} magma_sparse magma ) | ||||
| -    list( APPEND sparse-testing ${EXE} ) | ||||
| -endforeach() | ||||
| -add_custom_target( sparse-testing DEPENDS ${sparse-testing} ) | ||||
| +# if (MAGMA_ENABLE_CUDA) | ||||
| +#   include_directories( sparse/include ) | ||||
| +#   include_directories( sparse/control ) | ||||
| +# else() | ||||
| +#   include_directories( sparse_hip/include ) | ||||
| +#   include_directories( sparse_hip/control ) | ||||
| +# endif() | ||||
| +# include_directories( testing ) | ||||
| + | ||||
| +# if (MAGMA_ENABLE_CUDA) | ||||
| +#   cuda_add_library( magma_sparse ${libsparse_all} ) | ||||
| +#   target_link_libraries( magma_sparse | ||||
| +#     magma | ||||
| +#     ${blas_fix} | ||||
| +#     ${LAPACK_LIBRARIES} | ||||
| +#     ${CUDA_CUDART_LIBRARY} | ||||
| +#     ${CUDA_CUBLAS_LIBRARIES} | ||||
| +#     ${CUDA_cusparse_LIBRARY} | ||||
| +#     ) | ||||
| +# else() | ||||
| +#   add_library( magma_sparse ${libsparse_all} ) | ||||
| +#   target_link_libraries( magma_sparse | ||||
| +#     magma | ||||
| +#     ${blas_fix} | ||||
| +#     ${LAPACK_LIBRARIES} | ||||
| +#     hip::device | ||||
| +#     roc::hipblas | ||||
| +#     roc::hipsparse | ||||
| +#     ) | ||||
| +# endif() | ||||
| +# add_custom_target( sparse-lib DEPENDS magma_sparse ) | ||||
| + | ||||
| + | ||||
| +# # ---------------------------------------- | ||||
| +# # compile each tester | ||||
| + | ||||
| +# # save testers to testing/ | ||||
| +# # save tester lib files to testing_lib/ to avoid cluttering lib/ | ||||
| +# set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing ) | ||||
| +# set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib ) | ||||
| +# set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib ) | ||||
| + | ||||
| +# # skip Fortran testers, which require an extra file from CUDA | ||||
| +# foreach( filename ${testing_all} ) | ||||
| +#     if (filename MATCHES "\\.(c|cu|cpp)$") | ||||
| +#         list( APPEND testing_all_cpp ${filename} ) | ||||
| +#     endif() | ||||
| +# endforeach() | ||||
| +# foreach( TEST ${testing_all_cpp} ) | ||||
| +#     string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} ) | ||||
| +#     string( REGEX REPLACE "testing/" "" EXE ${EXE} ) | ||||
| +#     #message( "${TEST} --> ${EXE}" ) | ||||
| +#     add_executable( ${EXE} ${TEST} ) | ||||
| +#     target_link_libraries( ${EXE} tester lapacktest magma ) | ||||
| +#     list( APPEND testing ${EXE} ) | ||||
| +# endforeach() | ||||
| +# add_custom_target( testing DEPENDS ${testing} ) | ||||
| + | ||||
| + | ||||
| +# # ---------------------------------------- | ||||
| +# # compile each sparse tester | ||||
| + | ||||
| +# if (MAGMA_ENABLE_CUDA) | ||||
| +#   set(SPARSE_TEST_DIR "sparse/testing") | ||||
| +# else() | ||||
| +#   set(SPARSE_TEST_DIR "sparse_hip/testing") | ||||
| +# endif() | ||||
| + | ||||
| + | ||||
| +# set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" ) | ||||
| +# cmake_policy( SET CMP0037 OLD) | ||||
| +# foreach( TEST ${sparse_testing_all} ) | ||||
| +#     string( REGEX REPLACE "\\.(cpp|f90|F90)"     "" EXE ${TEST} ) | ||||
| +#     string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} ) | ||||
| +#     #message( "${TEST} --> ${EXE}" ) | ||||
| +#     add_executable( ${EXE} ${TEST} ) | ||||
| +#     target_link_libraries( ${EXE} magma_sparse magma ) | ||||
| +#     list( APPEND sparse-testing ${EXE} ) | ||||
| +# endforeach() | ||||
| +# add_custom_target( sparse-testing DEPENDS ${sparse-testing} ) | ||||
|  | ||||
|  | ||||
|  # ---------------------------------------- | ||||
|  # what to install | ||||
| -install( TARGETS magma magma_sparse ${blas_fix} | ||||
| +install( TARGETS magma ${blas_fix} | ||||
|           RUNTIME DESTINATION bin | ||||
|           LIBRARY DESTINATION lib | ||||
|           ARCHIVE DESTINATION lib ) | ||||
| -file( GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" ) | ||||
| +file( GLOB headers include/*.h "${CMAKE_BINARY_DIR}/include/*.h" ) | ||||
|  if (USE_FORTRAN) | ||||
|      install( FILES ${headers} ${modules} | ||||
|               DESTINATION include ) | ||||
| @@ -769,9 +779,9 @@ else() | ||||
|      "${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" ) | ||||
|  endif() | ||||
|  set( MAGMA_REQUIRED "" ) | ||||
| -configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY ) | ||||
| -install( FILES "${CMAKE_BINARY_DIR}/${pkgconfig}" | ||||
| -         DESTINATION lib/pkgconfig ) | ||||
| +# configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY ) | ||||
| +# install( FILES "${CMAKE_BINARY_DIR}/${pkgconfig}" | ||||
| +#          DESTINATION lib/pkgconfig ) | ||||
|  | ||||
|  # ---------------------------------------- | ||||
|  get_directory_property( compile_definitions COMPILE_DEFINITIONS ) | ||||
| @ -1,40 +0,0 @@ | ||||
| diff --git a/control/get_batched_crossover.cpp b/control/get_batched_crossover.cpp | ||||
| index 4ec57306..912f8608 100644 | ||||
| --- a/control/get_batched_crossover.cpp | ||||
| +++ b/control/get_batched_crossover.cpp | ||||
| @@ -119,7 +119,7 @@ void magma_get_spotrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_ | ||||
|  void magma_get_zgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb) | ||||
|  { | ||||
|      *nb    = 64; | ||||
| -    *recnb = 32; | ||||
| +    *recnb = 16; | ||||
|      return; | ||||
|  } | ||||
|   | ||||
| @@ -127,7 +127,7 @@ void magma_get_zgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_ | ||||
|  void magma_get_cgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb) | ||||
|  { | ||||
|      *nb    = 128; | ||||
| -    *recnb =  32; | ||||
| +    *recnb =  16; | ||||
|      return; | ||||
|  } | ||||
|   | ||||
| @@ -135,7 +135,7 @@ void magma_get_cgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_ | ||||
|  void magma_get_dgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb) | ||||
|  { | ||||
|      *nb    = 128; | ||||
| -    *recnb =  32; | ||||
| +    *recnb =  16; | ||||
|      return; | ||||
|  } | ||||
|   | ||||
| @@ -143,7 +143,7 @@ void magma_get_dgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_ | ||||
|  void magma_get_sgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb) | ||||
|  { | ||||
|      *nb    = 128; | ||||
| -    *recnb =  32; | ||||
| +    *recnb =  16; | ||||
|      return; | ||||
|  } | ||||
|   | ||||
| @ -1,15 +0,0 @@ | ||||
| diff --git a/src/zgetrf_batched.cpp b/src/zgetrf_batched.cpp | ||||
| index 24a65a90..884d9352 100644 | ||||
| --- a/src/zgetrf_batched.cpp | ||||
| +++ b/src/zgetrf_batched.cpp | ||||
| @@ -116,7 +116,9 @@ magma_zgetrf_batched( | ||||
|              return magma_zgetrf_batched_smallsq_noshfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue ); | ||||
|          } | ||||
|          else{ | ||||
| -            return magma_zgetrf_batched_smallsq_shfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue ); | ||||
| +            // magma_cgetrf_batched_smallsq_shfl is broken, therefore let's call noshfl version for arch < 700 | ||||
| +            // return magma_zgetrf_batched_smallsq_shfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue ); | ||||
| +            return magma_zgetrf_batched_smallsq_noshfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue ); | ||||
|          } | ||||
|          #else | ||||
|          return magma_zgetrf_batched_smallsq_noshfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue ); | ||||
| @ -1 +0,0 @@ | ||||
| 6cd83808c6e8bc7a44028e05112b3ab4e579bcc73202ed14733f66661127e213  magma-2.6.1.tar.gz | ||||
| @ -1,20 +0,0 @@ | ||||
| --- control/thread_queue.cpp	2016-08-30 06:37:49.000000000 -0700 | ||||
| +++ control/thread_queue.cpp	2016-10-10 19:47:28.911580965 -0700 | ||||
| @@ -15,7 +15,7 @@ | ||||
|  { | ||||
|      if ( err != 0 ) { | ||||
|          fprintf( stderr, "Error: %s (%d)\n", strerror(err), err ); | ||||
| -        throw std::exception(); | ||||
| +        // throw std::exception(); | ||||
|      } | ||||
|  } | ||||
|   | ||||
| @@ -172,7 +172,7 @@ | ||||
|      check( pthread_mutex_lock( &mutex )); | ||||
|      if ( quit_flag ) { | ||||
|          fprintf( stderr, "Error: push_task() called after quit()\n" ); | ||||
| -        throw std::exception(); | ||||
| +        // throw std::exception(); | ||||
|      } | ||||
|      q.push( task ); | ||||
|      ntask += 1; | ||||
| @ -1,21 +0,0 @@ | ||||
| The MIT License (MIT) | ||||
|  | ||||
| Copyright (c) 2016 manylinux | ||||
|  | ||||
| Permission is hereby granted, free of charge, to any person obtaining a copy | ||||
| of this software and associated documentation files (the "Software"), to deal | ||||
| in the Software without restriction, including without limitation the rights | ||||
| to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | ||||
| copies of the Software, and to permit persons to whom the Software is | ||||
| furnished to do so, subject to the following conditions: | ||||
|  | ||||
| The above copyright notice and this permission notice shall be included in all | ||||
| copies or substantial portions of the Software. | ||||
|  | ||||
| THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | ||||
| IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | ||||
| FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | ||||
| AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | ||||
| LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | ||||
| OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | ||||
| SOFTWARE. | ||||
| @ -1,28 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||||
|  | ||||
| case "${GPU_ARCH_TYPE:-BLANK}" in | ||||
|     BLANK) | ||||
|         # Legacy behavior for CircleCI | ||||
|         bash "${SCRIPTPATH}/build_cuda.sh" | ||||
|         ;; | ||||
|     cuda) | ||||
|         bash "${SCRIPTPATH}/build_cuda.sh" | ||||
|         ;; | ||||
|     rocm) | ||||
|         bash "${SCRIPTPATH}/build_rocm.sh" | ||||
|         ;; | ||||
|     cpu | cpu-cxx11-abi | cpu-s390x) | ||||
|         bash "${SCRIPTPATH}/build_cpu.sh" | ||||
|         ;; | ||||
|     xpu) | ||||
|         bash "${SCRIPTPATH}/build_xpu.sh" | ||||
|         ;; | ||||
|     *) | ||||
|         echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..." | ||||
|         exit 1 | ||||
|         ;; | ||||
| esac | ||||
| @ -1,498 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
| # meant to be called only from the neighboring build.sh and build_cpu.sh scripts | ||||
|  | ||||
| set -ex | ||||
| SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" | ||||
|  | ||||
| source ${SOURCE_DIR}/set_desired_python.sh | ||||
|  | ||||
|  | ||||
| if [[ -n "$BUILD_PYTHONLESS" && -z "$LIBTORCH_VARIANT" ]]; then | ||||
|     echo "BUILD_PYTHONLESS is set, so need LIBTORCH_VARIANT to also be set" | ||||
|     echo "LIBTORCH_VARIANT should be one of shared-with-deps shared-without-deps static-with-deps static-without-deps" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # Function to retry functions that sometimes timeout or have flaky failures | ||||
| retry () { | ||||
|     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) | ||||
| } | ||||
|  | ||||
| PLATFORM="manylinux2014_x86_64" | ||||
| # TODO move this into the Docker images | ||||
| OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release) | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|     retry yum install -q -y zip openssl | ||||
| elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     retry yum install -q -y zip openssl | ||||
|     PLATFORM="manylinux_2_28_x86_64" | ||||
| elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then | ||||
|     retry dnf install -q -y zip openssl | ||||
| elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|     # TODO: Remove this once nvidia package repos are back online | ||||
|     # Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968 | ||||
|     # shellcheck disable=SC2046 | ||||
|     sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list") | ||||
|  | ||||
|     retry apt-get update | ||||
|     retry apt-get -y install zip openssl | ||||
| fi | ||||
|  | ||||
| # We use the package name to test the package by passing this to 'pip install' | ||||
| # This is the env variable that setup.py uses to name the package. Note that | ||||
| # pip 'normalizes' the name first by changing all - to _ | ||||
| if [[ -z "$TORCH_PACKAGE_NAME" ]]; then | ||||
|     TORCH_PACKAGE_NAME='torch' | ||||
| fi | ||||
|  | ||||
| if [[ -z "$TORCH_NO_PYTHON_PACKAGE_NAME" ]]; then | ||||
|     TORCH_NO_PYTHON_PACKAGE_NAME='torch_no_python' | ||||
| fi | ||||
|  | ||||
| TORCH_PACKAGE_NAME="$(echo $TORCH_PACKAGE_NAME | tr '-' '_')" | ||||
| TORCH_NO_PYTHON_PACKAGE_NAME="$(echo $TORCH_NO_PYTHON_PACKAGE_NAME | tr '-' '_')" | ||||
| echo "Expecting the built wheels to all be called '$TORCH_PACKAGE_NAME' or '$TORCH_NO_PYTHON_PACKAGE_NAME'" | ||||
|  | ||||
| # Version: setup.py uses $PYTORCH_BUILD_VERSION.post$PYTORCH_BUILD_NUMBER if | ||||
| # PYTORCH_BUILD_NUMBER > 1 | ||||
| build_version="$PYTORCH_BUILD_VERSION" | ||||
| build_number="$PYTORCH_BUILD_NUMBER" | ||||
| if [[ -n "$OVERRIDE_PACKAGE_VERSION" ]]; then | ||||
|     # This will be the *exact* version, since build_number<1 | ||||
|     build_version="$OVERRIDE_PACKAGE_VERSION" | ||||
|     build_number=0 | ||||
| fi | ||||
| if [[ -z "$build_version" ]]; then | ||||
|     build_version=1.0.0 | ||||
| fi | ||||
| if [[ -z "$build_number" ]]; then | ||||
|     build_number=1 | ||||
| fi | ||||
| export PYTORCH_BUILD_VERSION=$build_version | ||||
| export PYTORCH_BUILD_NUMBER=$build_number | ||||
|  | ||||
| export CMAKE_LIBRARY_PATH="/opt/intel/lib:/lib:$CMAKE_LIBRARY_PATH" | ||||
| export CMAKE_INCLUDE_PATH="/opt/intel/include:$CMAKE_INCLUDE_PATH" | ||||
|  | ||||
| if [[ -e /opt/openssl ]]; then | ||||
|     export OPENSSL_ROOT_DIR=/opt/openssl | ||||
|     export CMAKE_INCLUDE_PATH="/opt/openssl/include":$CMAKE_INCLUDE_PATH | ||||
| fi | ||||
|  | ||||
|  | ||||
|  | ||||
| mkdir -p /tmp/$WHEELHOUSE_DIR | ||||
|  | ||||
| export PATCHELF_BIN=/usr/local/bin/patchelf | ||||
| patchelf_version=$($PATCHELF_BIN --version) | ||||
| echo "patchelf version: " $patchelf_version | ||||
| if [[ "$patchelf_version" == "patchelf 0.9" ]]; then | ||||
|     echo "Your patchelf version is too old. Please use version >= 0.10." | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| ######################################################## | ||||
| # Compile wheels as well as libtorch | ||||
| ####################################################### | ||||
| if [[ -z "$PYTORCH_ROOT" ]]; then | ||||
|     echo "Need to set PYTORCH_ROOT env variable" | ||||
|     exit 1 | ||||
| fi | ||||
| pushd "$PYTORCH_ROOT" | ||||
| python setup.py clean | ||||
| retry pip install -qr requirements.txt | ||||
| case ${DESIRED_PYTHON} in | ||||
|   cp31*) | ||||
|     retry pip install -q --pre numpy==2.1.0 | ||||
|     ;; | ||||
|   # Should catch 3.9+ | ||||
|   *) | ||||
|     retry pip install -q --pre numpy==2.0.2 | ||||
|     ;; | ||||
| esac | ||||
|  | ||||
| if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     export _GLIBCXX_USE_CXX11_ABI=1 | ||||
| else | ||||
|     export _GLIBCXX_USE_CXX11_ABI=0 | ||||
| fi | ||||
|  | ||||
| if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|     echo "Calling build_amd.py at $(date)" | ||||
|     python tools/amd_build/build_amd.py | ||||
| fi | ||||
|  | ||||
| # This value comes from binary_linux_build.sh (and should only be set to true | ||||
| # for master / release branches) | ||||
| BUILD_DEBUG_INFO=${BUILD_DEBUG_INFO:=0} | ||||
|  | ||||
| if [[ $BUILD_DEBUG_INFO == "1" ]]; then | ||||
|     echo "Building wheel and debug info" | ||||
| else | ||||
|     echo "BUILD_DEBUG_INFO was not set, skipping debug info" | ||||
| fi | ||||
|  | ||||
| if [[ "$DISABLE_RCCL" = 1 ]]; then | ||||
|     echo "Disabling NCCL/RCCL in pyTorch" | ||||
|     USE_RCCL=0 | ||||
|     USE_NCCL=0 | ||||
|     USE_KINETO=0 | ||||
| else | ||||
|     USE_RCCL=1 | ||||
|     USE_NCCL=1 | ||||
|     USE_KINETO=1 | ||||
| fi | ||||
|  | ||||
| echo "Calling setup.py bdist at $(date)" | ||||
|  | ||||
| if [[ "$USE_SPLIT_BUILD" == "true" ]]; then | ||||
|     echo "Calling setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)" | ||||
|     time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \ | ||||
|     BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 \ | ||||
|     BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \ | ||||
|     USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \ | ||||
|     python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR | ||||
|     echo "Finished setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)" | ||||
|     echo "Calling setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)" | ||||
|     time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \ | ||||
|     BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 \ | ||||
|     BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \ | ||||
|     USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \ | ||||
|     python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR --cmake | ||||
|     echo "Finished setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)" | ||||
| else | ||||
|     time CMAKE_ARGS=${CMAKE_ARGS[@]} \ | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \ | ||||
|         BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \ | ||||
|         USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \ | ||||
|         python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR | ||||
| fi | ||||
| echo "Finished setup.py bdist at $(date)" | ||||
|  | ||||
| # Build libtorch packages | ||||
| if [[ -n "$BUILD_PYTHONLESS" ]]; then | ||||
|     # Now build pythonless libtorch | ||||
|     # Note - just use whichever python we happen to be on | ||||
|     python setup.py clean | ||||
|  | ||||
|     if [[ $LIBTORCH_VARIANT = *"static"* ]]; then | ||||
|         STATIC_CMAKE_FLAG="-DTORCH_STATIC=1" | ||||
|     fi | ||||
|  | ||||
|     mkdir -p build | ||||
|     pushd build | ||||
|     echo "Calling tools/build_libtorch.py at $(date)" | ||||
|     time CMAKE_ARGS=${CMAKE_ARGS[@]} \ | ||||
|          EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \ | ||||
|          python ../tools/build_libtorch.py | ||||
|     echo "Finished tools/build_libtorch.py at $(date)" | ||||
|     popd | ||||
|  | ||||
|     mkdir -p libtorch/{lib,bin,include,share} | ||||
|     cp -r build/build/lib libtorch/ | ||||
|  | ||||
|     # for now, the headers for the libtorch package will just be copied in | ||||
|     # from one of the wheels (this is from when this script built multiple | ||||
|     # wheels at once) | ||||
|     ANY_WHEEL=$(ls /tmp/$WHEELHOUSE_DIR/torch*.whl | head -n1) | ||||
|     unzip -d any_wheel $ANY_WHEEL | ||||
|     if [[ -d any_wheel/torch/include ]]; then | ||||
|         cp -r any_wheel/torch/include libtorch/ | ||||
|     else | ||||
|         cp -r any_wheel/torch/lib/include libtorch/ | ||||
|     fi | ||||
|     cp -r any_wheel/torch/share/cmake libtorch/share/ | ||||
|     rm -rf any_wheel | ||||
|  | ||||
|     echo $PYTORCH_BUILD_VERSION > libtorch/build-version | ||||
|     echo "$(pushd $PYTORCH_ROOT && git rev-parse HEAD)" > libtorch/build-hash | ||||
|  | ||||
|     mkdir -p /tmp/$LIBTORCH_HOUSE_DIR | ||||
|  | ||||
|     if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|         LIBTORCH_ABI="cxx11-abi-" | ||||
|     else | ||||
|         LIBTORCH_ABI= | ||||
|     fi | ||||
|  | ||||
|     zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch | ||||
|     cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \ | ||||
|        /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip | ||||
| fi | ||||
|  | ||||
| popd | ||||
|  | ||||
| ####################################################################### | ||||
| # ADD DEPENDENCIES INTO THE WHEEL | ||||
| # | ||||
| # auditwheel repair doesn't work correctly and is buggy | ||||
| # so manually do the work of copying dependency libs and patchelfing | ||||
| # and fixing RECORDS entries correctly | ||||
| ###################################################################### | ||||
|  | ||||
| fname_with_sha256() { | ||||
|     HASH=$(sha256sum $1 | cut -c1-8) | ||||
|     DIRNAME=$(dirname $1) | ||||
|     BASENAME=$(basename $1) | ||||
|     # Do not rename nvrtc-builtins.so as they are dynamically loaded | ||||
|     # by libnvrtc.so | ||||
|     # Similarly don't mangle libcudnn and libcublas library names | ||||
|     if [[ $BASENAME == "libnvrtc-builtins.s"* || $BASENAME == "libcudnn"* || $BASENAME == "libcublas"*  ]]; then | ||||
|         echo $1 | ||||
|     else | ||||
|         INITNAME=$(echo $BASENAME | cut -f1 -d".") | ||||
|         ENDNAME=$(echo $BASENAME | cut -f 2- -d".") | ||||
|         echo "$DIRNAME/$INITNAME-$HASH.$ENDNAME" | ||||
|     fi | ||||
| } | ||||
|  | ||||
| fname_without_so_number() { | ||||
|     LINKNAME=$(echo $1 | sed -e 's/\.so.*/.so/g') | ||||
|     echo "$LINKNAME" | ||||
| } | ||||
|  | ||||
| make_wheel_record() { | ||||
|     FPATH=$1 | ||||
|     if echo $FPATH | grep RECORD >/dev/null 2>&1; then | ||||
|         # if the RECORD file, then | ||||
|         echo "\"$FPATH\",," | ||||
|     else | ||||
|         HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g') | ||||
|         FSIZE=$(ls -nl $FPATH | awk '{print $5}') | ||||
|         echo "\"$FPATH\",sha256=$HASH,$FSIZE" | ||||
|     fi | ||||
| } | ||||
|  | ||||
| replace_needed_sofiles() { | ||||
|     find $1 -name '*.so*' | while read sofile; do | ||||
|         origname=$2 | ||||
|         patchedname=$3 | ||||
|         if [[ "$origname" != "$patchedname" ]] || [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|             set +e | ||||
|             origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*") | ||||
|             ERRCODE=$? | ||||
|             set -e | ||||
|             if [ "$ERRCODE" -eq "0" ]; then | ||||
|                 echo "patching $sofile entry $origname to $patchedname" | ||||
|                 $PATCHELF_BIN --replace-needed $origname $patchedname $sofile | ||||
|             fi | ||||
|         fi | ||||
|     done | ||||
| } | ||||
|  | ||||
| echo 'Built this wheel:' | ||||
| ls /tmp/$WHEELHOUSE_DIR | ||||
| mkdir -p "/$WHEELHOUSE_DIR" | ||||
| mv /tmp/$WHEELHOUSE_DIR/torch*linux*.whl /$WHEELHOUSE_DIR/ | ||||
|  | ||||
| if [[ "$USE_SPLIT_BUILD" == "true" ]]; then | ||||
|     mv /tmp/$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/ || true | ||||
| fi | ||||
|  | ||||
| if [[ -n "$BUILD_PYTHONLESS" ]]; then | ||||
|     mkdir -p /$LIBTORCH_HOUSE_DIR | ||||
|     mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR | ||||
|     rm -rf /tmp/$LIBTORCH_HOUSE_DIR | ||||
| fi | ||||
| rm -rf /tmp/$WHEELHOUSE_DIR | ||||
| rm -rf /tmp_dir | ||||
| mkdir /tmp_dir | ||||
| pushd /tmp_dir | ||||
|  | ||||
| for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do | ||||
|  | ||||
|     # if the glob didn't match anything | ||||
|     if [[ ! -e $pkg ]]; then | ||||
|         continue | ||||
|     fi | ||||
|  | ||||
|     rm -rf tmp | ||||
|     mkdir -p tmp | ||||
|     cd tmp | ||||
|     cp $pkg . | ||||
|  | ||||
|     unzip -q $(basename $pkg) | ||||
|     rm -f $(basename $pkg) | ||||
|  | ||||
|     if [[ -d torch ]]; then | ||||
|         PREFIX=torch | ||||
|     else | ||||
|         PREFIX=libtorch | ||||
|     fi | ||||
|  | ||||
|     if [[ $pkg != *"without-deps"* ]]; then | ||||
|         # copy over needed dependent .so files over and tag them with their hash | ||||
|         patched=() | ||||
|         for filepath in "${DEPS_LIST[@]}"; do | ||||
|             filename=$(basename $filepath) | ||||
|             destpath=$PREFIX/lib/$filename | ||||
|             if [[ "$filepath" != "$destpath" ]]; then | ||||
|                 cp $filepath $destpath | ||||
|             fi | ||||
|  | ||||
|             # ROCm workaround for roctracer dlopens | ||||
|             if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|                 patchedpath=$(fname_without_so_number $destpath) | ||||
|             # Keep the so number for XPU dependencies | ||||
|             elif [[ "$DESIRED_CUDA" == *"xpu"* ]]; then | ||||
|                 patchedpath=$destpath | ||||
|             else | ||||
|                 patchedpath=$(fname_with_sha256 $destpath) | ||||
|             fi | ||||
|             patchedname=$(basename $patchedpath) | ||||
|             if [[ "$destpath" != "$patchedpath" ]]; then | ||||
|                 mv $destpath $patchedpath | ||||
|             fi | ||||
|             patched+=("$patchedname") | ||||
|             echo "Copied $filepath to $patchedpath" | ||||
|         done | ||||
|  | ||||
|         echo "patching to fix the so names to the hashed names" | ||||
|         for ((i=0;i<${#DEPS_LIST[@]};++i)); do | ||||
|             replace_needed_sofiles $PREFIX ${DEPS_SONAME[i]} ${patched[i]} | ||||
|             # do the same for caffe2, if it exists | ||||
|             if [[ -d caffe2 ]]; then | ||||
|                 replace_needed_sofiles caffe2 ${DEPS_SONAME[i]} ${patched[i]} | ||||
|             fi | ||||
|         done | ||||
|  | ||||
|         # copy over needed auxiliary files | ||||
|         for ((i=0;i<${#DEPS_AUX_SRCLIST[@]};++i)); do | ||||
|             srcpath=${DEPS_AUX_SRCLIST[i]} | ||||
|             dstpath=$PREFIX/${DEPS_AUX_DSTLIST[i]} | ||||
|             mkdir -p $(dirname $dstpath) | ||||
|             cp $srcpath $dstpath | ||||
|         done | ||||
|     fi | ||||
|  | ||||
|     # set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib | ||||
|     find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do | ||||
|         echo "Setting rpath of $sofile to ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'}" | ||||
|         $PATCHELF_BIN --set-rpath ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'} ${FORCE_RPATH:-} $sofile | ||||
|         $PATCHELF_BIN --print-rpath $sofile | ||||
|     done | ||||
|  | ||||
|     # set RPATH of lib/ files to $ORIGIN | ||||
|     find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do | ||||
|         echo "Setting rpath of $sofile to ${LIB_SO_RPATH:-'$ORIGIN'}" | ||||
|         $PATCHELF_BIN --set-rpath ${LIB_SO_RPATH:-'$ORIGIN'} ${FORCE_RPATH:-} $sofile | ||||
|         $PATCHELF_BIN --print-rpath $sofile | ||||
|     done | ||||
|  | ||||
|     # create Manylinux 2_28 tag this needs to happen before regenerate the RECORD | ||||
|     if [[ $PLATFORM == "manylinux_2_28_x86_64" && $GPU_ARCH_TYPE != "cpu-s390x" && $GPU_ARCH_TYPE != "xpu" ]]; then | ||||
|         wheel_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/WHEEL/g') | ||||
|         sed -i -e s#linux_x86_64#"${PLATFORM}"# $wheel_file; | ||||
|     fi | ||||
|  | ||||
|     # regenerate the RECORD file with new hashes | ||||
|     record_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g') | ||||
|     if [[ -e $record_file ]]; then | ||||
|         echo "Generating new record file $record_file" | ||||
|         : > "$record_file" | ||||
|         # generate records for folders in wheel | ||||
|         find * -type f | while read fname; do | ||||
|             make_wheel_record "$fname" >>"$record_file" | ||||
|         done | ||||
|     fi | ||||
|  | ||||
|     if [[ $BUILD_DEBUG_INFO == "1" ]]; then | ||||
|         pushd "$PREFIX/lib" | ||||
|  | ||||
|         # Duplicate library into debug lib | ||||
|         cp libtorch_cpu.so libtorch_cpu.so.dbg | ||||
|  | ||||
|         # Keep debug symbols on debug lib | ||||
|         strip --only-keep-debug libtorch_cpu.so.dbg | ||||
|  | ||||
|         # Remove debug info from release lib | ||||
|         strip --strip-debug libtorch_cpu.so | ||||
|  | ||||
|         objcopy libtorch_cpu.so --add-gnu-debuglink=libtorch_cpu.so.dbg | ||||
|  | ||||
|         # Zip up debug info | ||||
|         mkdir -p /tmp/debug | ||||
|         mv libtorch_cpu.so.dbg /tmp/debug/libtorch_cpu.so.dbg | ||||
|         CRC32=$(objcopy --dump-section .gnu_debuglink=>(tail -c4 | od -t x4 -An | xargs echo) libtorch_cpu.so) | ||||
|  | ||||
|         pushd /tmp | ||||
|         PKG_NAME=$(basename "$pkg" | sed 's/\.whl$//g') | ||||
|         zip /tmp/debug-whl-libtorch-"$PKG_NAME"-"$CRC32".zip /tmp/debug/libtorch_cpu.so.dbg | ||||
|         cp /tmp/debug-whl-libtorch-"$PKG_NAME"-"$CRC32".zip "$PYTORCH_FINAL_PACKAGE_DIR" | ||||
|         popd | ||||
|  | ||||
|         popd | ||||
|     fi | ||||
|  | ||||
|     # Rename wheel for Manylinux 2_28 | ||||
|     if [[ $PLATFORM == "manylinux_2_28_x86_64" && $GPU_ARCH_TYPE != "cpu-s390x" && $GPU_ARCH_TYPE != "xpu" ]]; then | ||||
|         pkg_name=$(echo $(basename $pkg) | sed -e s#linux_x86_64#"${PLATFORM}"#) | ||||
|         zip -rq $pkg_name $PREIX* | ||||
|         rm -f $pkg | ||||
|         mv $pkg_name $(dirname $pkg)/$pkg_name | ||||
|     else | ||||
|         # zip up the wheel back | ||||
|         zip -rq $(basename $pkg) $PREIX* | ||||
|         # remove original wheel | ||||
|         rm -f $pkg | ||||
|         mv $(basename $pkg) $pkg | ||||
|     fi | ||||
|  | ||||
|     cd .. | ||||
|     rm -rf tmp | ||||
| done | ||||
|  | ||||
| # Copy wheels to host machine for persistence before testing | ||||
| if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then | ||||
|     mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true | ||||
|     if [[ -n "$BUILD_PYTHONLESS" ]]; then | ||||
|         cp /$LIBTORCH_HOUSE_DIR/libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR" | ||||
|     else | ||||
|         cp /$WHEELHOUSE_DIR/torch*.whl "$PYTORCH_FINAL_PACKAGE_DIR" | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| # remove stuff before testing | ||||
| rm -rf /opt/rh | ||||
| if ls /usr/local/cuda* >/dev/null 2>&1; then | ||||
|     rm -rf /usr/local/cuda* | ||||
| fi | ||||
|  | ||||
|  | ||||
| # Test that all the wheels work | ||||
| if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|   export OMP_NUM_THREADS=4 # on NUMA machines this takes too long | ||||
|   pushd $PYTORCH_ROOT/test | ||||
|  | ||||
|   # Install the wheel for this Python version | ||||
|   if [[ "$USE_SPLIT_BUILD" == "true" ]]; then | ||||
|     pip uninstall -y "$TORCH_NO_PYTHON_PACKAGE_NAME" || true | ||||
|   fi | ||||
|  | ||||
|   pip uninstall -y "$TORCH_PACKAGE_NAME" | ||||
|  | ||||
|   if [[ "$USE_SPLIT_BUILD" == "true" ]]; then | ||||
|     pip install "$TORCH_NO_PYTHON_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v | ||||
|   fi | ||||
|  | ||||
|   pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v | ||||
|  | ||||
|   # Print info on the libraries installed in this wheel | ||||
|   # Rather than adjust find command to skip non-library files with an embedded *.so* in their name, | ||||
|   # since this is only for reporting purposes, we add the || true to the ldd command. | ||||
|   installed_libraries=($(find "$pydir/lib/python${py_majmin}/site-packages/torch/" -name '*.so*')) | ||||
|   echo "The wheel installed all of the libraries: ${installed_libraries[@]}" | ||||
|   for installed_lib in "${installed_libraries[@]}"; do | ||||
|       ldd "$installed_lib" || true | ||||
|   done | ||||
|  | ||||
|   # Run the tests | ||||
|   echo "$(date) :: Running tests" | ||||
|   pushd "$PYTORCH_ROOT" | ||||
|  | ||||
|  | ||||
|   LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \ | ||||
|           "${PYTORCH_ROOT}/.ci/pytorch/run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA" | ||||
|   popd | ||||
|   echo "$(date) :: Finished tests" | ||||
| fi | ||||
| @ -1,60 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| export TH_BINARY_BUILD=1 | ||||
| export USE_CUDA=0 | ||||
|  | ||||
| # Keep an array of cmake variables to add to | ||||
| if [[ -z "$CMAKE_ARGS" ]]; then | ||||
|     # These are passed to tools/build_pytorch_libs.sh::build() | ||||
|     CMAKE_ARGS=() | ||||
| fi | ||||
| if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then | ||||
|     # These are passed to tools/build_pytorch_libs.sh::build_caffe2() | ||||
|     EXTRA_CAFFE2_CMAKE_FLAGS=() | ||||
| fi | ||||
|  | ||||
| WHEELHOUSE_DIR="wheelhousecpu" | ||||
| LIBTORCH_HOUSE_DIR="libtorch_housecpu" | ||||
| if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then | ||||
|     if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|         PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhousecpu" | ||||
|     else | ||||
|         PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_housecpu" | ||||
|     fi | ||||
| fi | ||||
| mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true | ||||
|  | ||||
| OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release) | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|     if [[ "$(uname -m)" == "s390x" ]]; then | ||||
|         LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1" | ||||
|     else | ||||
|         LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1" | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| DEPS_LIST=( | ||||
|     "$LIBGOMP_PATH" | ||||
| ) | ||||
|  | ||||
| DEPS_SONAME=( | ||||
|     "libgomp.so.1" | ||||
| ) | ||||
|  | ||||
| rm -rf /usr/local/cuda* | ||||
|  | ||||
| SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" | ||||
| if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|     BUILD_SCRIPT=build_common.sh | ||||
| else | ||||
|     BUILD_SCRIPT=build_libtorch.sh | ||||
| fi | ||||
| source ${SOURCE_DIR}/${BUILD_SCRIPT} | ||||
| @ -1,299 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P ))" | ||||
|  | ||||
| export TORCH_NVCC_FLAGS="-Xfatbin -compress-all" | ||||
| export NCCL_ROOT_DIR=/usr/local/cuda | ||||
| export TH_BINARY_BUILD=1 | ||||
| export USE_STATIC_CUDNN=1 | ||||
| export USE_STATIC_NCCL=1 | ||||
| export ATEN_STATIC_CUDA=1 | ||||
| export USE_CUDA_STATIC_LINK=1 | ||||
| export INSTALL_TEST=0 # dont install test binaries into site-packages | ||||
| export USE_CUPTI_SO=0 | ||||
| export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build | ||||
|  | ||||
| # Keep an array of cmake variables to add to | ||||
| if [[ -z "$CMAKE_ARGS" ]]; then | ||||
|     # These are passed to tools/build_pytorch_libs.sh::build() | ||||
|     CMAKE_ARGS=() | ||||
| fi | ||||
| if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then | ||||
|     # These are passed to tools/build_pytorch_libs.sh::build_caffe2() | ||||
|     EXTRA_CAFFE2_CMAKE_FLAGS=() | ||||
| fi | ||||
|  | ||||
| # Determine CUDA version and architectures to build for | ||||
| # | ||||
| # NOTE: We should first check `DESIRED_CUDA` when determining `CUDA_VERSION`, | ||||
| # because in some cases a single Docker image can have multiple CUDA versions | ||||
| # on it, and `nvcc --version` might not show the CUDA version we want. | ||||
| if [[ -n "$DESIRED_CUDA" ]]; then | ||||
|     # If the DESIRED_CUDA already matches the format that we expect | ||||
|     if [[ ${DESIRED_CUDA} =~ ^[0-9]+\.[0-9]+$ ]]; then | ||||
|         CUDA_VERSION=${DESIRED_CUDA} | ||||
|     else | ||||
|         # cu90, cu92, cu100, cu101 | ||||
|         if [[ ${#DESIRED_CUDA} -eq 4 ]]; then | ||||
|             CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}" | ||||
|         elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then | ||||
|             CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}" | ||||
|         fi | ||||
|     fi | ||||
|     echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA" | ||||
|  | ||||
|     # There really has to be a better way to do this - eli | ||||
|     # Possibly limiting builds to specific cuda versions be delimiting images would be a choice | ||||
|     if [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|         echo "Switching to CUDA version ${DESIRED_CUDA}" | ||||
|         /builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}" | ||||
|     fi | ||||
| else | ||||
|     CUDA_VERSION=$(nvcc --version|grep release|cut -f5 -d" "|cut -f1 -d",") | ||||
|     echo "CUDA $CUDA_VERSION Detected" | ||||
| fi | ||||
|  | ||||
| cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.') | ||||
|  | ||||
| TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6" | ||||
| case ${CUDA_VERSION} in | ||||
|     12.6) | ||||
|         if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then | ||||
|             TORCH_CUDA_ARCH_LIST="9.0" | ||||
|         else | ||||
|             TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX" | ||||
|         fi | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
|     12.4) | ||||
|         if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then | ||||
|             TORCH_CUDA_ARCH_LIST="9.0" | ||||
|         else | ||||
|             TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0" | ||||
|         fi | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
|     12.1) | ||||
|         TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0" | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
|     11.8) | ||||
|         TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7;9.0" | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
|     *) | ||||
|         echo "unknown cuda version $CUDA_VERSION" | ||||
|         exit 1 | ||||
|         ;; | ||||
| esac | ||||
|  | ||||
| export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} | ||||
| echo "${TORCH_CUDA_ARCH_LIST}" | ||||
|  | ||||
| # Package directories | ||||
| WHEELHOUSE_DIR="wheelhouse$cuda_version_nodot" | ||||
| LIBTORCH_HOUSE_DIR="libtorch_house$cuda_version_nodot" | ||||
| if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then | ||||
|     if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|         PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$cuda_version_nodot" | ||||
|     else | ||||
|         PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$cuda_version_nodot" | ||||
|     fi | ||||
| fi | ||||
| mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true | ||||
|  | ||||
| OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release) | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1" | ||||
| fi | ||||
|  | ||||
| DEPS_LIST=( | ||||
|     "$LIBGOMP_PATH" | ||||
| ) | ||||
| DEPS_SONAME=( | ||||
|     "libgomp.so.1" | ||||
| ) | ||||
|  | ||||
| # CUDA 11.8 have to ship the libcusparseLt.so.0 with the binary | ||||
| # since nvidia-cusparselt-cu11 is not available in PYPI | ||||
| if [[ $USE_CUSPARSELT == "1" && $CUDA_VERSION == "11.8" ]]; then | ||||
|         DEPS_SONAME+=( | ||||
|             "libcusparseLt.so.0" | ||||
|         ) | ||||
|         DEPS_LIST+=( | ||||
|             "/usr/local/cuda/lib64/libcusparseLt.so.0" | ||||
|         ) | ||||
| fi | ||||
|  | ||||
| if [[ $CUDA_VERSION == "12.4" || $CUDA_VERSION == "12.6" ]]; then | ||||
|     export USE_STATIC_CUDNN=0 | ||||
|     # Try parallelizing nvcc as well | ||||
|     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" | ||||
|  | ||||
|     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then | ||||
|         echo "Bundling with cudnn and cublas." | ||||
|         DEPS_LIST+=( | ||||
|             "/usr/local/cuda/lib64/libcudnn_adv.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_cnn.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_graph.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_ops.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_heuristic.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn.so.9" | ||||
|             "/usr/local/cuda/lib64/libcublas.so.12" | ||||
|             "/usr/local/cuda/lib64/libcublasLt.so.12" | ||||
|             "/usr/local/cuda/lib64/libcusparseLt.so.0" | ||||
|             "/usr/local/cuda/lib64/libcudart.so.12" | ||||
|             "/usr/local/cuda/lib64/libnvToolsExt.so.1" | ||||
|             "/usr/local/cuda/lib64/libnvrtc.so.12" | ||||
|             "/usr/local/cuda/lib64/libnvrtc-builtins.so" | ||||
|         ) | ||||
|         DEPS_SONAME+=( | ||||
|             "libcudnn_adv.so.9" | ||||
|             "libcudnn_cnn.so.9" | ||||
|             "libcudnn_graph.so.9" | ||||
|             "libcudnn_ops.so.9" | ||||
|             "libcudnn_engines_runtime_compiled.so.9" | ||||
|             "libcudnn_engines_precompiled.so.9" | ||||
|             "libcudnn_heuristic.so.9" | ||||
|             "libcudnn.so.9" | ||||
|             "libcublas.so.12" | ||||
|             "libcublasLt.so.12" | ||||
|             "libcusparseLt.so.0" | ||||
|             "libcudart.so.12" | ||||
|             "libnvToolsExt.so.1" | ||||
|             "libnvrtc.so.12" | ||||
|             "libnvrtc-builtins.so" | ||||
|         ) | ||||
|     else | ||||
|         echo "Using nvidia libs from pypi." | ||||
|         CUDA_RPATHS=( | ||||
|             '$ORIGIN/../../nvidia/cublas/lib' | ||||
|             '$ORIGIN/../../nvidia/cuda_cupti/lib' | ||||
|             '$ORIGIN/../../nvidia/cuda_nvrtc/lib' | ||||
|             '$ORIGIN/../../nvidia/cuda_runtime/lib' | ||||
|             '$ORIGIN/../../nvidia/cudnn/lib' | ||||
|             '$ORIGIN/../../nvidia/cufft/lib' | ||||
|             '$ORIGIN/../../nvidia/curand/lib' | ||||
|             '$ORIGIN/../../nvidia/cusolver/lib' | ||||
|             '$ORIGIN/../../nvidia/cusparse/lib' | ||||
|             '$ORIGIN/../../cusparselt/lib' | ||||
|             '$ORIGIN/../../nvidia/nccl/lib' | ||||
|             '$ORIGIN/../../nvidia/nvtx/lib' | ||||
|         ) | ||||
|         CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") | ||||
|         export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' | ||||
|         export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' | ||||
|         export FORCE_RPATH="--force-rpath" | ||||
|         export USE_STATIC_NCCL=0 | ||||
|         export USE_SYSTEM_NCCL=1 | ||||
|         export ATEN_STATIC_CUDA=0 | ||||
|         export USE_CUDA_STATIC_LINK=0 | ||||
|         export USE_CUPTI_SO=1 | ||||
|         export NCCL_INCLUDE_DIR="/usr/local/cuda/include/" | ||||
|         export NCCL_LIB_DIR="/usr/local/cuda/lib64/" | ||||
|     fi | ||||
| elif [[ $CUDA_VERSION == "11.8" ]]; then | ||||
|     export USE_STATIC_CUDNN=0 | ||||
|     # Try parallelizing nvcc as well | ||||
|     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" | ||||
|     # Bundle ptxas into the wheel, see https://github.com/pytorch/pytorch/pull/119750 | ||||
|     export BUILD_BUNDLE_PTXAS=1 | ||||
|  | ||||
|     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then | ||||
|         echo "Bundling with cudnn and cublas." | ||||
|         DEPS_LIST+=( | ||||
|             "/usr/local/cuda/lib64/libcudnn_adv.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_cnn.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_graph.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_ops.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn_heuristic.so.9" | ||||
|             "/usr/local/cuda/lib64/libcudnn.so.9" | ||||
|             "/usr/local/cuda/lib64/libcublas.so.11" | ||||
|             "/usr/local/cuda/lib64/libcublasLt.so.11" | ||||
|             "/usr/local/cuda/lib64/libcudart.so.11.0" | ||||
|             "/usr/local/cuda/lib64/libnvToolsExt.so.1" | ||||
|             "/usr/local/cuda/lib64/libnvrtc.so.11.2"    # this is not a mistake, it links to more specific cuda version | ||||
|             "/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8" | ||||
|         ) | ||||
|         DEPS_SONAME+=( | ||||
|             "libcudnn_adv.so.9" | ||||
|             "libcudnn_cnn.so.9" | ||||
|             "libcudnn_graph.so.9" | ||||
|             "libcudnn_ops.so.9" | ||||
|             "libcudnn_engines_runtime_compiled.so.9" | ||||
|             "libcudnn_engines_precompiled.so.9" | ||||
|             "libcudnn_heuristic.so.9" | ||||
|             "libcudnn.so.9" | ||||
|             "libcublas.so.11" | ||||
|             "libcublasLt.so.11" | ||||
|             "libcudart.so.11.0" | ||||
|             "libnvToolsExt.so.1" | ||||
|             "libnvrtc.so.11.2" | ||||
|             "libnvrtc-builtins.so.11.8" | ||||
|         ) | ||||
|     else | ||||
|         echo "Using nvidia libs from pypi." | ||||
|         CUDA_RPATHS=( | ||||
|             '$ORIGIN/../../nvidia/cublas/lib' | ||||
|             '$ORIGIN/../../nvidia/cuda_cupti/lib' | ||||
|             '$ORIGIN/../../nvidia/cuda_nvrtc/lib' | ||||
|             '$ORIGIN/../../nvidia/cuda_runtime/lib' | ||||
|             '$ORIGIN/../../nvidia/cudnn/lib' | ||||
|             '$ORIGIN/../../nvidia/cufft/lib' | ||||
|             '$ORIGIN/../../nvidia/curand/lib' | ||||
|             '$ORIGIN/../../nvidia/cusolver/lib' | ||||
|             '$ORIGIN/../../nvidia/cusparse/lib' | ||||
|             '$ORIGIN/../../nvidia/nccl/lib' | ||||
|             '$ORIGIN/../../nvidia/nvtx/lib' | ||||
|         ) | ||||
|         CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") | ||||
|         export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' | ||||
|         export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' | ||||
|         export FORCE_RPATH="--force-rpath" | ||||
|         export USE_STATIC_NCCL=0 | ||||
|         export USE_SYSTEM_NCCL=1 | ||||
|         export ATEN_STATIC_CUDA=0 | ||||
|         export USE_CUDA_STATIC_LINK=0 | ||||
|         export USE_CUPTI_SO=1 | ||||
|         export NCCL_INCLUDE_DIR="/usr/local/cuda/include/" | ||||
|         export NCCL_LIB_DIR="/usr/local/cuda/lib64/" | ||||
|     fi | ||||
| else | ||||
|     echo "Unknown cuda version $CUDA_VERSION" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # builder/test.sh requires DESIRED_CUDA to know what tests to exclude | ||||
| export DESIRED_CUDA="$cuda_version_nodot" | ||||
|  | ||||
| # Switch `/usr/local/cuda` to the desired CUDA version | ||||
| rm -rf /usr/local/cuda || true | ||||
| ln -s "/usr/local/cuda-${CUDA_VERSION}" /usr/local/cuda | ||||
|  | ||||
| # Switch `/usr/local/magma` to the desired CUDA version | ||||
| rm -rf /usr/local/magma || true | ||||
| ln -s /usr/local/cuda-${CUDA_VERSION}/magma /usr/local/magma | ||||
|  | ||||
| export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) # 10.0.130 | ||||
| export CUDA_VERSION_SHORT=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev | cut -f1,2 -d".") # 10.0 | ||||
| export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) | ||||
|  | ||||
| SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" | ||||
| if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|     BUILD_SCRIPT=build_common.sh | ||||
| else | ||||
|     BUILD_SCRIPT=build_libtorch.sh | ||||
| fi | ||||
| source $SCRIPTPATH/${BUILD_SCRIPT} | ||||
| @ -1,353 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
| # meant to be called only from the neighboring build.sh and build_cpu.sh scripts | ||||
|  | ||||
| set -e pipefail | ||||
| SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" | ||||
|  | ||||
| # Require only one python installation | ||||
| if [[ -z "$DESIRED_PYTHON" ]]; then | ||||
|     echo "Need to set DESIRED_PYTHON env variable" | ||||
|     exit 1 | ||||
| fi | ||||
| if [[ -n "$BUILD_PYTHONLESS" && -z "$LIBTORCH_VARIANT" ]]; then | ||||
|     echo "BUILD_PYTHONLESS is set, so need LIBTORCH_VARIANT to also be set" | ||||
|     echo "LIBTORCH_VARIANT should be one of shared-with-deps shared-without-deps static-with-deps static-without-deps" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # Function to retry functions that sometimes timeout or have flaky failures | ||||
| retry () { | ||||
|     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) | ||||
| } | ||||
|  | ||||
| # TODO move this into the Docker images | ||||
| OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release` | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|     retry yum install -q -y zip openssl | ||||
| elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     retry yum install -q -y zip openssl | ||||
| elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then | ||||
|     retry dnf install -q -y zip openssl | ||||
| elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|     # TODO: Remove this once nvidia package repos are back online | ||||
|     # Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968 | ||||
|     # shellcheck disable=SC2046 | ||||
|     sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list") | ||||
|     retry apt-get update | ||||
|     retry apt-get -y install zip openssl | ||||
| fi | ||||
|  | ||||
| # Version: setup.py uses $PYTORCH_BUILD_VERSION.post$PYTORCH_BUILD_NUMBER if | ||||
| # PYTORCH_BUILD_NUMBER > 1 | ||||
| build_version="$PYTORCH_BUILD_VERSION" | ||||
| build_number="$PYTORCH_BUILD_NUMBER" | ||||
| if [[ -n "$OVERRIDE_PACKAGE_VERSION" ]]; then | ||||
|     # This will be the *exact* version, since build_number<1 | ||||
|     build_version="$OVERRIDE_PACKAGE_VERSION" | ||||
|     build_number=0 | ||||
| fi | ||||
| if [[ -z "$build_version" ]]; then | ||||
|     build_version=1.0.0 | ||||
| fi | ||||
| if [[ -z "$build_number" ]]; then | ||||
|     build_number=1 | ||||
| fi | ||||
| export PYTORCH_BUILD_VERSION=$build_version | ||||
| export PYTORCH_BUILD_NUMBER=$build_number | ||||
|  | ||||
| export CMAKE_LIBRARY_PATH="/opt/intel/lib:/lib:$CMAKE_LIBRARY_PATH" | ||||
| export CMAKE_INCLUDE_PATH="/opt/intel/include:$CMAKE_INCLUDE_PATH" | ||||
|  | ||||
| # set OPENSSL_ROOT_DIR=/opt/openssl if it exists | ||||
| if [[ -e /opt/openssl ]]; then | ||||
|     export OPENSSL_ROOT_DIR=/opt/openssl | ||||
|     export CMAKE_INCLUDE_PATH="/opt/openssl/include":$CMAKE_INCLUDE_PATH | ||||
| fi | ||||
|  | ||||
| # If given a python version like 3.6m or 2.7mu, convert this to the format we | ||||
| # expect. The binary CI jobs pass in python versions like this; they also only | ||||
| # ever pass one python version, so we assume that DESIRED_PYTHON is not a list | ||||
| # in this case | ||||
| if [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then | ||||
|     python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)" | ||||
|     DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}" | ||||
| fi | ||||
| pydir="/opt/python/$DESIRED_PYTHON" | ||||
| export PATH="$pydir/bin:$PATH" | ||||
|  | ||||
| export PATCHELF_BIN=/usr/local/bin/patchelf | ||||
| patchelf_version=`$PATCHELF_BIN --version` | ||||
| echo "patchelf version: " $patchelf_version | ||||
| if [[ "$patchelf_version" == "patchelf 0.9" ]]; then | ||||
|     echo "Your patchelf version is too old. Please use version >= 0.10." | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| ######################################################## | ||||
| # Compile wheels as well as libtorch | ||||
| ####################################################### | ||||
| if [[ -z "$PYTORCH_ROOT" ]]; then | ||||
|     echo "Need to set PYTORCH_ROOT env variable" | ||||
|     exit 1 | ||||
| fi | ||||
| pushd "$PYTORCH_ROOT" | ||||
| python setup.py clean | ||||
| retry pip install -qr requirements.txt | ||||
| retry pip install -q numpy==2.0.1 | ||||
|  | ||||
| if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     export _GLIBCXX_USE_CXX11_ABI=1 | ||||
| else | ||||
|     export _GLIBCXX_USE_CXX11_ABI=0 | ||||
| fi | ||||
|  | ||||
| if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|     echo "Calling build_amd.py at $(date)" | ||||
|     python tools/amd_build/build_amd.py | ||||
|     # TODO remove this work-around once pytorch sources are updated | ||||
|     export ROCclr_DIR=/opt/rocm/rocclr/lib/cmake/rocclr | ||||
| fi | ||||
|  | ||||
| echo "Calling setup.py install at $(date)" | ||||
|  | ||||
| if [[ $LIBTORCH_VARIANT = *"static"* ]]; then | ||||
|     STATIC_CMAKE_FLAG="-DTORCH_STATIC=1" | ||||
| fi | ||||
|  | ||||
| ( | ||||
|     set -x | ||||
|  | ||||
|     mkdir -p build | ||||
|  | ||||
|     time CMAKE_ARGS=${CMAKE_ARGS[@]} \ | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \ | ||||
|         # TODO: Remove this flag once https://github.com/pytorch/pytorch/issues/55952 is closed | ||||
|         CFLAGS='-Wno-deprecated-declarations' \ | ||||
|         BUILD_LIBTORCH_CPU_WITH_DEBUG=1 \ | ||||
|         python setup.py install | ||||
|  | ||||
|     mkdir -p libtorch/{lib,bin,include,share} | ||||
|  | ||||
|     # Make debug folder separate so it doesn't get zipped up with the rest of | ||||
|     # libtorch | ||||
|     mkdir debug | ||||
|  | ||||
|     # Copy over all lib files | ||||
|     cp -rv build/lib/*                libtorch/lib/ | ||||
|     cp -rv build/lib*/torch/lib/*     libtorch/lib/ | ||||
|  | ||||
|     # Copy over all include files | ||||
|     cp -rv build/include/*            libtorch/include/ | ||||
|     cp -rv build/lib*/torch/include/* libtorch/include/ | ||||
|  | ||||
|     # Copy over all of the cmake files | ||||
|     cp -rv build/lib*/torch/share/*   libtorch/share/ | ||||
|  | ||||
|     # Split libtorch into debug / release version | ||||
|     cp libtorch/lib/libtorch_cpu.so libtorch/lib/libtorch_cpu.so.dbg | ||||
|  | ||||
|     # Keep debug symbols on debug lib | ||||
|     strip --only-keep-debug libtorch/lib/libtorch_cpu.so.dbg | ||||
|  | ||||
|     # Remove debug info from release lib | ||||
|     strip --strip-debug libtorch/lib/libtorch_cpu.so | ||||
|  | ||||
|     # Add a debug link to the release lib to the debug lib (debuggers will then | ||||
|     # search for symbols in a file called libtorch_cpu.so.dbg in some | ||||
|     # predetermined locations) and embed a CRC32 of the debug library into the .so | ||||
|     cd libtorch/lib | ||||
|  | ||||
|     objcopy libtorch_cpu.so --add-gnu-debuglink=libtorch_cpu.so.dbg | ||||
|     cd ../.. | ||||
|  | ||||
|     # Move the debug symbols to its own directory so it doesn't get processed / | ||||
|     # zipped with all the other libraries | ||||
|     mv libtorch/lib/libtorch_cpu.so.dbg debug/libtorch_cpu.so.dbg | ||||
|  | ||||
|     echo "${PYTORCH_BUILD_VERSION}" > libtorch/build-version | ||||
|     echo "$(pushd $PYTORCH_ROOT && git rev-parse HEAD)" > libtorch/build-hash | ||||
|  | ||||
| ) | ||||
|  | ||||
| if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     LIBTORCH_ABI="cxx11-abi-" | ||||
| else | ||||
|     LIBTORCH_ABI= | ||||
| fi | ||||
|  | ||||
| ( | ||||
|     set -x | ||||
|  | ||||
|     mkdir -p /tmp/$LIBTORCH_HOUSE_DIR | ||||
|  | ||||
|     # objcopy installs a CRC32 into libtorch_cpu above so, so add that to the name here | ||||
|     CRC32=$(objcopy --dump-section .gnu_debuglink=>(tail -c4 | od -t x4 -An | xargs echo) libtorch/lib/libtorch_cpu.so) | ||||
|  | ||||
|     # Zip debug symbols | ||||
|     zip /tmp/$LIBTORCH_HOUSE_DIR/debug-libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION-$CRC32.zip debug/libtorch_cpu.so.dbg | ||||
|  | ||||
|     # Zip and copy libtorch | ||||
|     zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch | ||||
|     cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \ | ||||
|        /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip | ||||
| ) | ||||
|  | ||||
|  | ||||
| popd | ||||
|  | ||||
| ####################################################################### | ||||
| # ADD DEPENDENCIES INTO THE WHEEL | ||||
| # | ||||
| # auditwheel repair doesn't work correctly and is buggy | ||||
| # so manually do the work of copying dependency libs and patchelfing | ||||
| # and fixing RECORDS entries correctly | ||||
| ###################################################################### | ||||
|  | ||||
| fname_with_sha256() { | ||||
|     HASH=$(sha256sum $1 | cut -c1-8) | ||||
|     DIRNAME=$(dirname $1) | ||||
|     BASENAME=$(basename $1) | ||||
|     if [[ $BASENAME == "libnvrtc-builtins.so" || $BASENAME == "libcudnn"* ]]; then | ||||
|         echo $1 | ||||
|     else | ||||
|         INITNAME=$(echo $BASENAME | cut -f1 -d".") | ||||
|         ENDNAME=$(echo $BASENAME | cut -f 2- -d".") | ||||
|         echo "$DIRNAME/$INITNAME-$HASH.$ENDNAME" | ||||
|     fi | ||||
| } | ||||
|  | ||||
| fname_without_so_number() { | ||||
|     LINKNAME=$(echo $1 | sed -e 's/\.so.*/.so/g') | ||||
|     echo "$LINKNAME" | ||||
| } | ||||
|  | ||||
| make_wheel_record() { | ||||
|     FPATH=$1 | ||||
|     if echo $FPATH | grep RECORD >/dev/null 2>&1; then | ||||
|         # if the RECORD file, then | ||||
|         echo "\"$FPATH\",," | ||||
|     else | ||||
|         HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g') | ||||
|         FSIZE=$(ls -nl $FPATH | awk '{print $5}') | ||||
|         echo "\"$FPATH\",sha256=$HASH,$FSIZE" | ||||
|     fi | ||||
| } | ||||
|  | ||||
| echo 'Built this package:' | ||||
| ( | ||||
|     set -x | ||||
|     mkdir -p /$LIBTORCH_HOUSE_DIR | ||||
|     mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR | ||||
|     rm -rf /tmp/$LIBTORCH_HOUSE_DIR | ||||
| ) | ||||
| TMP_DIR=$(mktemp -d) | ||||
| trap "rm -rf ${TMP_DIR}" EXIT | ||||
| pushd "${TMP_DIR}" | ||||
|  | ||||
| for pkg in /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do | ||||
|  | ||||
|     # if the glob didn't match anything | ||||
|     if [[ ! -e $pkg ]]; then | ||||
|         continue | ||||
|     fi | ||||
|  | ||||
|     rm -rf tmp | ||||
|     mkdir -p tmp | ||||
|     cd tmp | ||||
|     cp $pkg . | ||||
|  | ||||
|     unzip -q $(basename $pkg) | ||||
|     rm -f $(basename $pkg) | ||||
|  | ||||
|     PREFIX=libtorch | ||||
|  | ||||
|     if [[ $pkg != *"without-deps"* ]]; then | ||||
|         # copy over needed dependent .so files over and tag them with their hash | ||||
|         patched=() | ||||
|         for filepath in "${DEPS_LIST[@]}"; do | ||||
|             filename=$(basename $filepath) | ||||
|             destpath=$PREFIX/lib/$filename | ||||
|             if [[ "$filepath" != "$destpath" ]]; then | ||||
|                 cp $filepath $destpath | ||||
|             fi | ||||
|  | ||||
|             if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|                 patchedpath=$(fname_without_so_number $destpath) | ||||
|             else | ||||
|                 patchedpath=$(fname_with_sha256 $destpath) | ||||
|             fi | ||||
|             patchedname=$(basename $patchedpath) | ||||
|             if [[ "$destpath" != "$patchedpath" ]]; then | ||||
|                 mv $destpath $patchedpath | ||||
|             fi | ||||
|             patched+=("$patchedname") | ||||
|             echo "Copied $filepath to $patchedpath" | ||||
|         done | ||||
|  | ||||
|         echo "patching to fix the so names to the hashed names" | ||||
|         for ((i=0;i<${#DEPS_LIST[@]};++i)); do | ||||
|             find $PREFIX -name '*.so*' | while read sofile; do | ||||
|                 origname=${DEPS_SONAME[i]} | ||||
|                 patchedname=${patched[i]} | ||||
|                 if [[ "$origname" != "$patchedname" ]] || [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|                     set +e | ||||
|                     origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*") | ||||
|                     ERRCODE=$? | ||||
|                     set -e | ||||
|                     if [ "$ERRCODE" -eq "0" ]; then | ||||
|                         echo "patching $sofile entry $origname to $patchedname" | ||||
|                         $PATCHELF_BIN --replace-needed $origname $patchedname $sofile | ||||
|                     fi | ||||
|                 fi | ||||
|             done | ||||
|         done | ||||
|  | ||||
|         # copy over needed auxiliary files | ||||
|         for ((i=0;i<${#DEPS_AUX_SRCLIST[@]};++i)); do | ||||
|             srcpath=${DEPS_AUX_SRCLIST[i]} | ||||
|             dstpath=$PREFIX/${DEPS_AUX_DSTLIST[i]} | ||||
|             mkdir -p $(dirname $dstpath) | ||||
|             cp $srcpath $dstpath | ||||
|         done | ||||
|     fi | ||||
|  | ||||
|     # set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib | ||||
|     find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do | ||||
|         echo "Setting rpath of $sofile to " '$ORIGIN:$ORIGIN/lib' | ||||
|         $PATCHELF_BIN --set-rpath '$ORIGIN:$ORIGIN/lib' $sofile | ||||
|         $PATCHELF_BIN --print-rpath $sofile | ||||
|     done | ||||
|  | ||||
|     # set RPATH of lib/ files to $ORIGIN | ||||
|     find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do | ||||
|         echo "Setting rpath of $sofile to " '$ORIGIN' | ||||
|         $PATCHELF_BIN --set-rpath '$ORIGIN' $sofile | ||||
|         $PATCHELF_BIN --print-rpath $sofile | ||||
|     done | ||||
|  | ||||
|     # regenerate the RECORD file with new hashes | ||||
|     record_file=`echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g'` | ||||
|     if [[ -e $record_file ]]; then | ||||
|         echo "Generating new record file $record_file" | ||||
|         rm -f $record_file | ||||
|         # generate records for folders in wheel | ||||
|         find * -type f | while read fname; do | ||||
|             echo $(make_wheel_record $fname) >>$record_file | ||||
|         done | ||||
|     fi | ||||
|  | ||||
|     # zip up the wheel back | ||||
|     zip -rq $(basename $pkg) $PREFIX* | ||||
|  | ||||
|     # replace original wheel | ||||
|     rm -f $pkg | ||||
|     mv $(basename $pkg) $pkg | ||||
|     cd .. | ||||
|     rm -rf tmp | ||||
| done | ||||
|  | ||||
| # Copy wheels to host machine for persistence before testing | ||||
| if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then | ||||
|     cp /$LIBTORCH_HOUSE_DIR/libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR" | ||||
|     cp /$LIBTORCH_HOUSE_DIR/debug-libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR" | ||||
| fi | ||||
| @ -1,291 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| export ROCM_HOME=/opt/rocm | ||||
| export MAGMA_HOME=$ROCM_HOME/magma | ||||
| # TODO: libtorch_cpu.so is broken when building with Debug info | ||||
| export BUILD_DEBUG_INFO=0 | ||||
|  | ||||
| # TODO Are these all used/needed? | ||||
| export TH_BINARY_BUILD=1 | ||||
| export USE_STATIC_CUDNN=1 | ||||
| export USE_STATIC_NCCL=1 | ||||
| export ATEN_STATIC_CUDA=1 | ||||
| export USE_CUDA_STATIC_LINK=1 | ||||
| export INSTALL_TEST=0 # dont install test binaries into site-packages | ||||
| # Set RPATH instead of RUNPATH when using patchelf to avoid LD_LIBRARY_PATH override | ||||
| export FORCE_RPATH="--force-rpath" | ||||
|  | ||||
| # Keep an array of cmake variables to add to | ||||
| if [[ -z "$CMAKE_ARGS" ]]; then | ||||
|     # These are passed to tools/build_pytorch_libs.sh::build() | ||||
|     CMAKE_ARGS=() | ||||
| fi | ||||
| if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then | ||||
|     # These are passed to tools/build_pytorch_libs.sh::build_caffe2() | ||||
|     EXTRA_CAFFE2_CMAKE_FLAGS=() | ||||
| fi | ||||
|  | ||||
| # Determine ROCm version and architectures to build for | ||||
| # | ||||
| # NOTE: We should first check `DESIRED_CUDA` when determining `ROCM_VERSION` | ||||
| if [[ -n "$DESIRED_CUDA" ]]; then | ||||
|     if ! echo "${DESIRED_CUDA}"| grep "^rocm" >/dev/null 2>/dev/null; then | ||||
|         export DESIRED_CUDA="rocm${DESIRED_CUDA}" | ||||
|     fi | ||||
|     # rocm3.7, rocm3.5.1 | ||||
|     ROCM_VERSION="$DESIRED_CUDA" | ||||
|     echo "Using $ROCM_VERSION as determined by DESIRED_CUDA" | ||||
| else | ||||
|     echo "Must set DESIRED_CUDA" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # Package directories | ||||
| WHEELHOUSE_DIR="wheelhouse$ROCM_VERSION" | ||||
| LIBTORCH_HOUSE_DIR="libtorch_house$ROCM_VERSION" | ||||
| if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then | ||||
|     if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|         PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$ROCM_VERSION" | ||||
|     else | ||||
|         PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$ROCM_VERSION" | ||||
|     fi | ||||
| fi | ||||
| mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true | ||||
|  | ||||
| # To make version comparison easier, create an integer representation. | ||||
| ROCM_VERSION_CLEAN=$(echo ${ROCM_VERSION} | sed s/rocm//) | ||||
| save_IFS="$IFS" | ||||
| IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION_CLEAN}) | ||||
| IFS="$save_IFS" | ||||
| if [[ ${#ROCM_VERSION_ARRAY[@]} == 2 ]]; then | ||||
|     ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]} | ||||
|     ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]} | ||||
|     ROCM_VERSION_PATCH=0 | ||||
| elif [[ ${#ROCM_VERSION_ARRAY[@]} == 3 ]]; then | ||||
|     ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]} | ||||
|     ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]} | ||||
|     ROCM_VERSION_PATCH=${ROCM_VERSION_ARRAY[2]} | ||||
| else | ||||
|     echo "Unhandled ROCM_VERSION ${ROCM_VERSION}" | ||||
|     exit 1 | ||||
| fi | ||||
| ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH)) | ||||
|  | ||||
| # Required ROCm libraries | ||||
| ROCM_SO_FILES=( | ||||
|     "libMIOpen.so" | ||||
|     "libamdhip64.so" | ||||
|     "libhipblas.so" | ||||
|     "libhipfft.so" | ||||
|     "libhiprand.so" | ||||
|     "libhipsolver.so" | ||||
|     "libhipsparse.so" | ||||
|     "libhsa-runtime64.so" | ||||
|     "libamd_comgr.so" | ||||
|     "libmagma.so" | ||||
|     "librccl.so" | ||||
|     "librocblas.so" | ||||
|     "librocfft.so" | ||||
|     "librocm_smi64.so" | ||||
|     "librocrand.so" | ||||
|     "librocsolver.so" | ||||
|     "librocsparse.so" | ||||
|     "libroctracer64.so" | ||||
|     "libroctx64.so" | ||||
|     "libhipblaslt.so" | ||||
|     "libhiprtc.so" | ||||
| ) | ||||
|  | ||||
| if [[ $ROCM_INT -ge 60100 ]]; then | ||||
|     ROCM_SO_FILES+=("librocprofiler-register.so") | ||||
| fi | ||||
|  | ||||
| if [[ $ROCM_INT -ge 60200 ]]; then | ||||
|     ROCM_SO_FILES+=("librocm-core.so") | ||||
| fi | ||||
|  | ||||
| OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release` | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* || "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
|     LIBNUMA_PATH="/usr/lib64/libnuma.so.1" | ||||
|     LIBELF_PATH="/usr/lib64/libelf.so.1" | ||||
|     if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|         LIBTINFO_PATH="/usr/lib64/libtinfo.so.5" | ||||
|     else | ||||
|         LIBTINFO_PATH="/usr/lib64/libtinfo.so.6" | ||||
|     fi | ||||
|     LIBDRM_PATH="/opt/amdgpu/lib64/libdrm.so.2" | ||||
|     LIBDRM_AMDGPU_PATH="/opt/amdgpu/lib64/libdrm_amdgpu.so.1" | ||||
|     if [[ $ROCM_INT -ge 60100 ]]; then | ||||
|         # Below libs are direct dependencies of libhipsolver | ||||
|         LIBSUITESPARSE_CONFIG_PATH="/lib64/libsuitesparseconfig.so.4" | ||||
|         if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|             LIBCHOLMOD_PATH="/lib64/libcholmod.so.2" | ||||
|             # Below libs are direct dependencies of libsatlas | ||||
|             LIBGFORTRAN_PATH="/lib64/libgfortran.so.3" | ||||
|         else | ||||
|             LIBCHOLMOD_PATH="/lib64/libcholmod.so.3" | ||||
|             # Below libs are direct dependencies of libsatlas | ||||
|             LIBGFORTRAN_PATH="/lib64/libgfortran.so.5" | ||||
|         fi | ||||
|         # Below libs are direct dependencies of libcholmod | ||||
|         LIBAMD_PATH="/lib64/libamd.so.2" | ||||
|         LIBCAMD_PATH="/lib64/libcamd.so.2" | ||||
|         LIBCCOLAMD_PATH="/lib64/libccolamd.so.2" | ||||
|         LIBCOLAMD_PATH="/lib64/libcolamd.so.2" | ||||
|         LIBSATLAS_PATH="/lib64/atlas/libsatlas.so.3" | ||||
|         # Below libs are direct dependencies of libsatlas | ||||
|         LIBQUADMATH_PATH="/lib64/libquadmath.so.0" | ||||
|     fi | ||||
|     MAYBE_LIB64=lib64 | ||||
| elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1" | ||||
|     LIBNUMA_PATH="/usr/lib/x86_64-linux-gnu/libnuma.so.1" | ||||
|     LIBELF_PATH="/usr/lib/x86_64-linux-gnu/libelf.so.1" | ||||
|     if [[ $ROCM_INT -ge 50300 ]]; then | ||||
|         LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.6" | ||||
|     else | ||||
|         LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.5" | ||||
|     fi | ||||
|     LIBDRM_PATH="/usr/lib/x86_64-linux-gnu/libdrm.so.2" | ||||
|     LIBDRM_AMDGPU_PATH="/usr/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1" | ||||
|     if [[ $ROCM_INT -ge 60100 ]]; then | ||||
|         # Below libs are direct dependencies of libhipsolver | ||||
|         LIBCHOLMOD_PATH="/lib/x86_64-linux-gnu/libcholmod.so.3" | ||||
|         # Below libs are direct dependencies of libcholmod | ||||
|         LIBSUITESPARSE_CONFIG_PATH="/lib/x86_64-linux-gnu/libsuitesparseconfig.so.5" | ||||
|         LIBAMD_PATH="/lib/x86_64-linux-gnu/libamd.so.2" | ||||
|         LIBCAMD_PATH="/lib/x86_64-linux-gnu/libcamd.so.2" | ||||
|         LIBCCOLAMD_PATH="/lib/x86_64-linux-gnu/libccolamd.so.2" | ||||
|         LIBCOLAMD_PATH="/lib/x86_64-linux-gnu/libcolamd.so.2" | ||||
|         LIBMETIS_PATH="/lib/x86_64-linux-gnu/libmetis.so.5" | ||||
|         LIBLAPACK_PATH="/lib/x86_64-linux-gnu/liblapack.so.3" | ||||
|         LIBBLAS_PATH="/lib/x86_64-linux-gnu/libblas.so.3" | ||||
|         # Below libs are direct dependencies of libblas | ||||
|         LIBGFORTRAN_PATH="/lib/x86_64-linux-gnu/libgfortran.so.5" | ||||
|         LIBQUADMATH_PATH="/lib/x86_64-linux-gnu/libquadmath.so.0" | ||||
|     fi | ||||
|     MAYBE_LIB64=lib | ||||
| fi | ||||
| OS_SO_PATHS=($LIBGOMP_PATH $LIBNUMA_PATH\ | ||||
|              $LIBELF_PATH $LIBTINFO_PATH\ | ||||
|              $LIBDRM_PATH $LIBDRM_AMDGPU_PATH\ | ||||
|              $LIBSUITESPARSE_CONFIG_PATH\ | ||||
|              $LIBCHOLMOD_PATH $LIBAMD_PATH\ | ||||
|              $LIBCAMD_PATH $LIBCCOLAMD_PATH\ | ||||
|              $LIBCOLAMD_PATH $LIBSATLAS_PATH\ | ||||
|              $LIBGFORTRAN_PATH $LIBQUADMATH_PATH\ | ||||
|              $LIBMETIS_PATH $LIBLAPACK_PATH\ | ||||
|              $LIBBLAS_PATH) | ||||
| OS_SO_FILES=() | ||||
| for lib in "${OS_SO_PATHS[@]}" | ||||
| do | ||||
|     file_name="${lib##*/}" # Substring removal of path to get filename | ||||
|     OS_SO_FILES[${#OS_SO_FILES[@]}]=$file_name # Append lib to array | ||||
| done | ||||
|  | ||||
| # FIXME: Temporary until https://github.com/pytorch/pytorch/pull/137443 lands | ||||
| # Install AOTriton | ||||
| if [ -e ${PYTORCH_ROOT}/.ci/docker/aotriton_version.txt ]; then | ||||
|     cp -a ${PYTORCH_ROOT}/.ci/docker/aotriton_version.txt aotriton_version.txt | ||||
|     bash ${PYTORCH_ROOT}/.ci/docker/common/install_aotriton.sh ${ROCM_HOME} && rm aotriton_version.txt | ||||
|     export AOTRITON_INSTALLED_PREFIX=${ROCM_HOME}/aotriton | ||||
|     ROCM_SO_FILES+=("libaotriton_v2.so") | ||||
| fi | ||||
|  | ||||
| # rocBLAS library files | ||||
| ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library | ||||
| ROCBLAS_LIB_DST=lib/rocblas/library | ||||
| ARCH=$(echo $PYTORCH_ROCM_ARCH | sed 's/;/|/g') # Replace ; seperated arch list to bar for grep | ||||
| ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH) | ||||
| OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx) | ||||
| ROCBLAS_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES) | ||||
|  | ||||
| # hipblaslt library files | ||||
| HIPBLASLT_LIB_SRC=$ROCM_HOME/lib/hipblaslt/library | ||||
| HIPBLASLT_LIB_DST=lib/hipblaslt/library | ||||
| ARCH_SPECIFIC_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -E $ARCH) | ||||
| OTHER_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -v gfx) | ||||
| HIPBLASLT_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES) | ||||
|  | ||||
| # ROCm library files | ||||
| ROCM_SO_PATHS=() | ||||
| for lib in "${ROCM_SO_FILES[@]}" | ||||
| do | ||||
|     file_path=($(find $ROCM_HOME/lib/ -name "$lib")) # First search in lib | ||||
|     if [[ -z $file_path ]]; then | ||||
|         if [ -d "$ROCM_HOME/lib64/" ]; then | ||||
|             file_path=($(find $ROCM_HOME/lib64/ -name "$lib")) # Then search in lib64 | ||||
|         fi | ||||
|     fi | ||||
|     if [[ -z $file_path ]]; then | ||||
|         file_path=($(find $ROCM_HOME/ -name "$lib")) # Then search in ROCM_HOME | ||||
|     fi | ||||
|     if [[ -z $file_path ]]; then | ||||
|         echo "Error: Library file $lib is not found." >&2 | ||||
|         exit 1 | ||||
|     fi | ||||
|     ROCM_SO_PATHS[${#ROCM_SO_PATHS[@]}]="$file_path" # Append lib to array | ||||
| done | ||||
|  | ||||
| DEPS_LIST=( | ||||
|     ${ROCM_SO_PATHS[*]} | ||||
|     ${OS_SO_PATHS[*]} | ||||
| ) | ||||
|  | ||||
| DEPS_SONAME=( | ||||
|     ${ROCM_SO_FILES[*]} | ||||
|     ${OS_SO_FILES[*]} | ||||
| ) | ||||
|  | ||||
| DEPS_AUX_SRCLIST=( | ||||
|     "${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_SRC/}" | ||||
|     "${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_SRC/}" | ||||
|     "/opt/amdgpu/share/libdrm/amdgpu.ids" | ||||
| ) | ||||
|  | ||||
| DEPS_AUX_DSTLIST=( | ||||
|     "${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_DST/}" | ||||
|     "${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_DST/}" | ||||
|     "share/libdrm/amdgpu.ids" | ||||
| ) | ||||
|  | ||||
| # MIOpen library files | ||||
| MIOPEN_SHARE_SRC=$ROCM_HOME/share/miopen/db | ||||
| MIOPEN_SHARE_DST=share/miopen/db | ||||
| MIOPEN_SHARE_FILES=($(ls $MIOPEN_SHARE_SRC | grep -E $ARCH)) | ||||
| DEPS_AUX_SRCLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_SRC/}) | ||||
| DEPS_AUX_DSTLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_DST/}) | ||||
|  | ||||
| # RCCL library files | ||||
| RCCL_SHARE_SRC=$ROCM_HOME/share/rccl/msccl-algorithms | ||||
| RCCL_SHARE_DST=share/rccl/msccl-algorithms | ||||
| RCCL_SHARE_FILES=($(ls $RCCL_SHARE_SRC)) | ||||
| DEPS_AUX_SRCLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_SRC/}) | ||||
| DEPS_AUX_DSTLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_DST/}) | ||||
|  | ||||
| # PyTorch 2.6+ (AOTriton 0.8b+) | ||||
| # AKS = "AOTriton Kernel Storage", a file format to store GPU kernels compactly | ||||
| if (( $(echo "${PYTORCH_VERSION} 2.6" | awk '{print ($1 >= $2)}') )); then | ||||
|     LIBAOTRITON_DIR=$(find "$ROCM_HOME/lib/" -name "libaotriton_v2.so" -printf '%h\n') | ||||
|     if [[ -z ${LIBAOTRITON_DIR} ]]; then | ||||
|         LIBAOTRITON_DIR=$(find "$ROCM_HOME/" -name "libaotriton_v2.so" -printf '%h\n') | ||||
|     fi | ||||
|     AKS_FILES=($(find "${LIBAOTRITON_DIR}/aotriton.images" -type f -name '*.aks?' -printf '%P\n')) | ||||
|     AKS_SRC="${LIBAOTRITON_DIR}/aotriton.images" | ||||
|     AKS_DST="lib/aotriton.images" | ||||
|     DEPS_AUX_SRCLIST+=(${AKS_FILES[@]/#/${AKS_SRC}/}) | ||||
|     DEPS_AUX_DSTLIST+=(${AKS_FILES[@]/#/${AKS_DST}/}) | ||||
| fi | ||||
|  | ||||
| echo "PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH}" | ||||
|  | ||||
| SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )" | ||||
| if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|     BUILD_SCRIPT=build_common.sh | ||||
| else | ||||
|     BUILD_SCRIPT=build_libtorch.sh | ||||
| fi | ||||
| source $SCRIPTPATH/${BUILD_SCRIPT} | ||||
| @ -1,108 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| export TH_BINARY_BUILD=1 | ||||
| export USE_CUDA=0 | ||||
|  | ||||
| # Keep an array of cmake variables to add to | ||||
| if [[ -z "$CMAKE_ARGS" ]]; then | ||||
|     # These are passed to tools/build_pytorch_libs.sh::build() | ||||
|     CMAKE_ARGS=() | ||||
| fi | ||||
| if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then | ||||
|     # These are passed to tools/build_pytorch_libs.sh::build_caffe2() | ||||
|     EXTRA_CAFFE2_CMAKE_FLAGS=() | ||||
| fi | ||||
|  | ||||
|  | ||||
| # Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html | ||||
| source /opt/intel/oneapi/compiler/latest/env/vars.sh | ||||
| source /opt/intel/oneapi/pti/latest/env/vars.sh | ||||
| source /opt/intel/oneapi/umf/latest/env/vars.sh | ||||
| export USE_STATIC_MKL=1 | ||||
|  | ||||
| WHEELHOUSE_DIR="wheelhousexpu" | ||||
| LIBTORCH_HOUSE_DIR="libtorch_housexpu" | ||||
| if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then | ||||
|     if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|         PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhousexpu" | ||||
|     else | ||||
|         PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_housexpu" | ||||
|     fi | ||||
| fi | ||||
| mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true | ||||
|  | ||||
| OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release) | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|     if [[ "$(uname -m)" == "s390x" ]]; then | ||||
|         LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1" | ||||
|     else | ||||
|         LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1" | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| DEPS_LIST=( | ||||
|     "$LIBGOMP_PATH" | ||||
|     "/opt/intel/oneapi/compiler/latest/lib/libOpenCL.so.1" | ||||
| ) | ||||
|  | ||||
| DEPS_SONAME=( | ||||
|     "libgomp.so.1" | ||||
|     "libOpenCL.so.1" | ||||
| ) | ||||
|  | ||||
| if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then | ||||
|     echo "Bundling with xpu support package libs." | ||||
|     DEPS_LIST+=( | ||||
|         "/opt/intel/oneapi/compiler/latest/lib/libsycl.so.8" | ||||
|         "/opt/intel/oneapi/compiler/latest/lib/libur_loader.so.0" | ||||
|         "/opt/intel/oneapi/compiler/latest/lib/libur_adapter_level_zero.so.0" | ||||
|         "/opt/intel/oneapi/compiler/latest/lib/libur_adapter_opencl.so.0" | ||||
|         "/opt/intel/oneapi/compiler/latest/lib/libsvml.so" | ||||
|         "/opt/intel/oneapi/compiler/latest/lib/libirng.so" | ||||
|         "/opt/intel/oneapi/compiler/latest/lib/libimf.so" | ||||
|         "/opt/intel/oneapi/compiler/latest/lib/libintlc.so.5" | ||||
|         "/opt/intel/oneapi/pti/latest/lib/libpti_view.so.0.10" | ||||
|         "/opt/intel/oneapi/umf/latest/lib/libumf.so.0" | ||||
|         "/opt/intel/oneapi/tcm/latest/lib/libhwloc.so.15" | ||||
|     ) | ||||
|     DEPS_SONAME+=( | ||||
|         "libsycl.so.8" | ||||
|         "libur_loader.so.0" | ||||
|         "libur_adapter_level_zero.so.0" | ||||
|         "libur_adapter_opencl.so.0" | ||||
|         "libsvml.so" | ||||
|         "libirng.so" | ||||
|         "libimf.so" | ||||
|         "libintlc.so.5" | ||||
|         "libpti_view.so.0.10" | ||||
|         "libumf.so.0" | ||||
|         "libhwloc.so.15" | ||||
|     ) | ||||
| else | ||||
|     echo "Using xpu runtime libs from pypi." | ||||
|     XPU_RPATHS=( | ||||
|         '$ORIGIN/../../../..' | ||||
|     ) | ||||
|     XPU_RPATHS=$(IFS=: ; echo "${XPU_RPATHS[*]}") | ||||
|     export C_SO_RPATH=$XPU_RPATHS':$ORIGIN:$ORIGIN/lib' | ||||
|     export LIB_SO_RPATH=$XPU_RPATHS':$ORIGIN' | ||||
|     export FORCE_RPATH="--force-rpath" | ||||
| fi | ||||
|  | ||||
| rm -rf /usr/local/cuda* | ||||
|  | ||||
| SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )" | ||||
| if [[ -z "$BUILD_PYTHONLESS" ]]; then | ||||
|     BUILD_SCRIPT=build_common.sh | ||||
| else | ||||
|     BUILD_SCRIPT=build_libtorch.sh | ||||
| fi | ||||
| source ${SOURCE_DIR}/${BUILD_SCRIPT} | ||||
| @ -1,30 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| # Require only one python installation | ||||
| if [[ -z "$DESIRED_PYTHON" ]]; then | ||||
|     echo "Need to set DESIRED_PYTHON env variable" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # If given a python version like 3.6m or 2.7mu, convert this to the format we | ||||
| # expect. The binary CI jobs pass in python versions like this; they also only | ||||
| # ever pass one python version, so we assume that DESIRED_PYTHON is not a list | ||||
| # in this case | ||||
| if [[ -n "$DESIRED_PYTHON" && $DESIRED_PYTHON =~ ([0-9].[0-9]+)t ]]; then | ||||
|     python_digits="$(echo $DESIRED_PYTHON | tr -cd [:digit:])" | ||||
|     py_majmin="${DESIRED_PYTHON}" | ||||
|     DESIRED_PYTHON="cp${python_digits}-cp${python_digits}t" | ||||
| elif [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then | ||||
|     python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)" | ||||
|     DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}" | ||||
|     if [[ ${python_nodot} -ge 310 ]]; then | ||||
|         py_majmin="${DESIRED_PYTHON:2:1}.${DESIRED_PYTHON:3:2}" | ||||
|     else | ||||
|         py_majmin="${DESIRED_PYTHON:2:1}.${DESIRED_PYTHON:3:1}" | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| pydir="/opt/python/$DESIRED_PYTHON" | ||||
| export DESIRED_PYTHON_BIN_DIR="${pydir}/bin" | ||||
| export PATH="$DESIRED_PYTHON_BIN_DIR:$PATH" | ||||
| echo "Will build for Python version: ${DESIRED_PYTHON}" | ||||
| @ -1,26 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
| set -e | ||||
|  | ||||
| yum install -y wget git | ||||
|  | ||||
| rm -rf /usr/local/cuda* | ||||
|  | ||||
| # Install Anaconda | ||||
| if ! ls /py | ||||
| then | ||||
|     echo "Miniconda needs to be installed" | ||||
|     wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh | ||||
|     bash ~/miniconda.sh -b -p /py | ||||
| else | ||||
|     echo "Miniconda is already installed" | ||||
| fi | ||||
|  | ||||
| export PATH="/py/bin:$PATH" | ||||
|  | ||||
| # Anaconda token | ||||
| if ls /remote/token | ||||
| then | ||||
|    source /remote/token | ||||
| fi | ||||
|  | ||||
| conda install -y conda-build anaconda-client | ||||
| @ -1,6 +1,6 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex -o pipefail | ||||
| set -ex | ||||
|  | ||||
| # Required environment variable: $BUILD_ENVIRONMENT | ||||
| # (This is set by default in the Docker images we build, so you don't | ||||
| @ -49,8 +49,13 @@ if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then | ||||
| fi | ||||
|  | ||||
| # Enable LLVM dependency for TensorExpr testing | ||||
| export USE_LLVM=/opt/llvm | ||||
| export LLVM_DIR=/opt/llvm/lib/cmake/llvm | ||||
| if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then | ||||
|   export USE_LLVM=/opt/rocm/llvm | ||||
|   export LLVM_DIR=/opt/rocm/llvm/lib/cmake/llvm | ||||
| else | ||||
|   export USE_LLVM=/opt/llvm | ||||
|   export LLVM_DIR=/opt/llvm/lib/cmake/llvm | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *executorch* ]]; then | ||||
|   # To build test_edge_op_registration | ||||
| @ -87,7 +92,7 @@ else | ||||
|  | ||||
|   # Workaround required for MKL library linkage | ||||
|   # https://github.com/pytorch/pytorch/issues/119557 | ||||
|   if [[ "$ANACONDA_PYTHON_VERSION" = "3.12" || "$ANACONDA_PYTHON_VERSION" = "3.13" ]]; then | ||||
|   if [ "$ANACONDA_PYTHON_VERSION" = "3.12" ]; then | ||||
|     export CMAKE_LIBRARY_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/lib/" | ||||
|     export CMAKE_INCLUDE_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/include/" | ||||
|   fi | ||||
| @ -178,7 +183,7 @@ fi | ||||
| # sccache will fail for CUDA builds if all cores are used for compiling | ||||
| # gcc 7 with sccache seems to have intermittent OOM issue if all cores are used | ||||
| if [ -z "$MAX_JOBS" ]; then | ||||
|   if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; } && which sccache > /dev/null; then | ||||
|   if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then | ||||
|     export MAX_JOBS=$(($(nproc) - 1)) | ||||
|   fi | ||||
| fi | ||||
| @ -191,7 +196,7 @@ fi | ||||
|  | ||||
| # We only build FlashAttention files for CUDA 8.0+, and they require large amounts of | ||||
| # memory to build and will OOM | ||||
| if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]]; then | ||||
| if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ "$TORCH_CUDA_ARCH_LIST" == *"8.6"* || "$TORCH_CUDA_ARCH_LIST" == *"8.0"* ]]; then | ||||
|   echo "WARNING: FlashAttention files require large amounts of memory to build and will OOM" | ||||
|   echo "Setting MAX_JOBS=(nproc-2)/3 to reduce memory usage" | ||||
|   export MAX_JOBS="$(( $(nproc --ignore=2) / 3 ))" | ||||
| @ -203,12 +208,10 @@ if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then | ||||
|     export USE_CUDA=1 | ||||
|   fi | ||||
|   export LDSHARED="clang --shared" | ||||
|   export USE_CUDA=0 | ||||
|   export USE_ASAN=1 | ||||
|   export REL_WITH_DEB_INFO=1 | ||||
|   export UBSAN_FLAGS="-fno-sanitize-recover=all" | ||||
|   export UBSAN_FLAGS="-fno-sanitize-recover=all;-fno-sanitize=float-divide-by-zero;-fno-sanitize=float-cast-overflow" | ||||
|   unset USE_LLVM | ||||
| fi | ||||
|  | ||||
| @ -220,6 +223,10 @@ if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then | ||||
|     export USE_PRECOMPILED_HEADERS=1 | ||||
| fi | ||||
|  | ||||
| if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build*  ]]; then | ||||
|   export USE_GLOO_WITH_OPENSSL=ON | ||||
| fi | ||||
|  | ||||
| if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then | ||||
|   export BUILD_STATIC_RUNTIME_BENCHMARK=ON | ||||
| fi | ||||
| @ -230,7 +237,7 @@ fi | ||||
|  | ||||
| # Do not change workspace permissions for ROCm CI jobs | ||||
| # as it can leave workspace with bad permissions for cancelled jobs | ||||
| if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then | ||||
| if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then | ||||
|   # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96) | ||||
|   WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace") | ||||
|   cleanup_workspace() { | ||||
| @ -250,6 +257,7 @@ if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then | ||||
|   set -e | ||||
|  | ||||
|   get_bazel | ||||
|   install_sccache_nvcc_for_bazel | ||||
|  | ||||
|   # Leave 1 CPU free and use only up to 80% of memory to reduce the change of crashing | ||||
|   # the runner | ||||
| @ -284,7 +292,8 @@ else | ||||
|       WERROR=1 python setup.py clean | ||||
|  | ||||
|       if [[ "$USE_SPLIT_BUILD" == "true" ]]; then | ||||
|         python3 tools/packaging/split_wheel.py bdist_wheel | ||||
|         BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 python setup.py bdist_wheel | ||||
|         BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 python setup.py bdist_wheel --cmake | ||||
|       else | ||||
|         WERROR=1 python setup.py bdist_wheel | ||||
|       fi | ||||
| @ -336,11 +345,11 @@ else | ||||
|     CUSTOM_OP_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/custom-op-build" | ||||
|     CUSTOM_OP_TEST="$PWD/test/custom_operator" | ||||
|     python --version | ||||
|     SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')" | ||||
|     SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" | ||||
|  | ||||
|     mkdir -p "$CUSTOM_OP_BUILD" | ||||
|     pushd "$CUSTOM_OP_BUILD" | ||||
|     cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \ | ||||
|     cmake "$CUSTOM_OP_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \ | ||||
|           -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM" | ||||
|     make VERBOSE=1 | ||||
|     popd | ||||
| @ -350,10 +359,10 @@ else | ||||
|     JIT_HOOK_BUILD="${CUSTOM_TEST_ARTIFACT_BUILD_DIR}/jit-hook-build" | ||||
|     JIT_HOOK_TEST="$PWD/test/jit_hooks" | ||||
|     python --version | ||||
|     SITE_PACKAGES="$(python -c 'import site; print(";".join([x for x in site.getsitepackages()] + [x + "/torch" for x in site.getsitepackages()]))')" | ||||
|     SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" | ||||
|     mkdir -p "$JIT_HOOK_BUILD" | ||||
|     pushd "$JIT_HOOK_BUILD" | ||||
|     cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \ | ||||
|     cmake "$JIT_HOOK_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \ | ||||
|           -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM" | ||||
|     make VERBOSE=1 | ||||
|     popd | ||||
| @ -365,7 +374,7 @@ else | ||||
|     python --version | ||||
|     mkdir -p "$CUSTOM_BACKEND_BUILD" | ||||
|     pushd "$CUSTOM_BACKEND_BUILD" | ||||
|     cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \ | ||||
|     cmake "$CUSTOM_BACKEND_TEST" -DCMAKE_PREFIX_PATH="$SITE_PACKAGES/torch;$SITE_PACKAGES" -DPython_EXECUTABLE="$(which python)" \ | ||||
|           -DCMAKE_MODULE_PATH="$CUSTOM_TEST_MODULE_PATH" -DUSE_ROCM="$CUSTOM_TEST_USE_ROCM" | ||||
|     make VERBOSE=1 | ||||
|     popd | ||||
| @ -395,7 +404,9 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; | ||||
|   # don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build | ||||
|   python tools/stats/export_test_times.py | ||||
| fi | ||||
| # don't do this for bazel or s390x as they don't use sccache | ||||
| if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then | ||||
|  | ||||
| # snadampal: skipping it till sccache support added for aarch64 | ||||
| # https://github.com/pytorch/pytorch/issues/121559 | ||||
| if [[ "$BUILD_ENVIRONMENT" != *aarch64* ]]; then | ||||
|   print_sccache_stats | ||||
| fi | ||||
|  | ||||
| @ -1,394 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # shellcheck disable=SC2086,SC2006,SC2207,SC2076,SC2155,SC2046,SC1091,SC2143 | ||||
| # TODO: Re-enable shellchecks above | ||||
|  | ||||
| set -eux -o pipefail | ||||
|  | ||||
| # This script checks the following things on binaries | ||||
| # 1. The gcc abi matches DESIRED_DEVTOOLSET | ||||
| # 2. MacOS binaries do not link against OpenBLAS | ||||
| # 3. There are no protobuf symbols of any sort anywhere (turned off, because | ||||
| #    this is currently not true) | ||||
| # 4. Standard Python imports work | ||||
| # 5. MKL is available everywhere except for MacOS wheels | ||||
| # 6. XNNPACK is available everywhere except for MacOS wheels | ||||
| # 7. CUDA is setup correctly and does not hang | ||||
| # 8. Magma is available for CUDA builds | ||||
| # 9. CuDNN is available for CUDA builds | ||||
| # | ||||
| # This script needs the env variables DESIRED_PYTHON, DESIRED_CUDA, | ||||
| # DESIRED_DEVTOOLSET and PACKAGE_TYPE | ||||
| # | ||||
| # This script expects PyTorch to be installed into the active Python (the | ||||
| # Python returned by `which python`). Or, if this is testing a libtorch | ||||
| # Pythonless binary, then it expects to be in the root folder of the unzipped | ||||
| # libtorch package. | ||||
|  | ||||
|  | ||||
| if [[ -z ${DESIRED_PYTHON:-} ]]; then | ||||
|   export DESIRED_PYTHON=${MATRIX_PYTHON_VERSION:-} | ||||
| fi | ||||
| if [[ -z ${DESIRED_CUDA:-} ]]; then | ||||
|   export DESIRED_CUDA=${MATRIX_DESIRED_CUDA:-} | ||||
| fi | ||||
| if [[ -z ${DESIRED_DEVTOOLSET:-} ]]; then | ||||
|   export DESIRED_DEVTOOLSET=${MATRIX_DESIRED_DEVTOOLSET:-} | ||||
| fi | ||||
| if [[ -z ${PACKAGE_TYPE:-} ]]; then | ||||
|   export PACKAGE_TYPE=${MATRIX_PACKAGE_TYPE:-} | ||||
| fi | ||||
|  | ||||
| # The install root depends on both the package type and the os | ||||
| # All MacOS packages use conda, even for the wheel packages. | ||||
| if [[ "$PACKAGE_TYPE" == libtorch ]]; then | ||||
|   # NOTE: Only $PWD works on both CentOS and Ubuntu | ||||
|   export install_root="$PWD" | ||||
| else | ||||
|  | ||||
|   if [[ $DESIRED_PYTHON =~ ([0-9].[0-9]+)t ]]; then | ||||
|     # For python that is maj.mint keep original version | ||||
|     py_dot="$DESIRED_PYTHON" | ||||
|   elif [[ $DESIRED_PYTHON =~ ([0-9].[0-9]+) ]];  then | ||||
|     # Strip everything but major.minor from DESIRED_PYTHON version | ||||
|     py_dot="${BASH_REMATCH[0]}" | ||||
|   else | ||||
|     echo "Unexpected ${DESIRED_PYTHON} format" | ||||
|     exit 1 | ||||
|   fi | ||||
|   export install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/" | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Setup XPU ENV | ||||
| ############################################################################### | ||||
| if [[ "$DESIRED_CUDA" == 'xpu' ]]; then | ||||
|   set +u | ||||
|   # Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html | ||||
|   source /opt/intel/oneapi/compiler/latest/env/vars.sh | ||||
|   source /opt/intel/oneapi/pti/latest/env/vars.sh | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check GCC ABI | ||||
| ############################################################################### | ||||
|  | ||||
| # NOTE [ Building libtorch with old vs. new gcc ABI ] | ||||
| # | ||||
| # Packages built with one version of ABI could not be linked against by client | ||||
| # C++ libraries that were compiled using the other version of ABI. Since both | ||||
| # gcc ABIs are still common in the wild, we need to support both ABIs. Currently: | ||||
| # | ||||
| # - All the nightlies built on CentOS 7 + devtoolset7 use the old gcc ABI. | ||||
| # - All the nightlies built on Ubuntu 16.04 + gcc 5.4 use the new gcc ABI. | ||||
|  | ||||
| echo "Checking that the gcc ABI is what we expect" | ||||
| if [[ "$(uname)" != 'Darwin' ]]; then | ||||
|   function is_expected() { | ||||
|     if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* || "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|       if [[ "$1" -gt 0 || "$1" == "ON " ]]; then | ||||
|         echo 1 | ||||
|       fi | ||||
|     else | ||||
|       if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then | ||||
|         echo 1 | ||||
|       fi | ||||
|     fi | ||||
|   } | ||||
|  | ||||
|   # First we check that the env var in TorchConfig.cmake is correct | ||||
|  | ||||
|   # We search for D_GLIBCXX_USE_CXX11_ABI=1 in torch/TorchConfig.cmake | ||||
|   torch_config="${install_root}/share/cmake/Torch/TorchConfig.cmake" | ||||
|   if [[ ! -f "$torch_config" ]]; then | ||||
|     echo "No TorchConfig.cmake found!" | ||||
|     ls -lah "$install_root/share/cmake/Torch" | ||||
|     exit 1 | ||||
|   fi | ||||
|   echo "Checking the TorchConfig.cmake" | ||||
|   cat "$torch_config" | ||||
|  | ||||
|   # The sed call below is | ||||
|   #   don't print lines by default (only print the line we want) | ||||
|   # -n | ||||
|   #   execute the following expression | ||||
|   # e | ||||
|   #   replace lines that match with the first capture group and print | ||||
|   # s/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p | ||||
|   #   any characters, D_GLIBCXX_USE_CXX11_ABI=, exactly one any character, a | ||||
|   #   quote, any characters | ||||
|   #   Note the exactly one single character after the '='. In the case that the | ||||
|   #     variable is not set the '=' will be followed by a '"' immediately and the | ||||
|   #     line will fail the match and nothing will be printed; this is what we | ||||
|   #     want.  Otherwise it will capture the 0 or 1 after the '='. | ||||
|   # /.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/ | ||||
|   #   replace the matched line with the capture group and print | ||||
|   # /\1/p | ||||
|   actual_gcc_abi="$(sed -ne 's/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p' < "$torch_config")" | ||||
|   if [[ "$(is_expected "$actual_gcc_abi")" != 1 ]]; then | ||||
|     echo "gcc ABI $actual_gcc_abi not as expected." | ||||
|     exit 1 | ||||
|   fi | ||||
|  | ||||
|   # We also check that there are [not] cxx11 symbols in libtorch | ||||
|   # | ||||
|   echo "Checking that symbols in libtorch.so have the right gcc abi" | ||||
|   python3 "$(dirname ${BASH_SOURCE[0]})/smoke_test/check_binary_symbols.py" | ||||
|  | ||||
|   echo "cxx11 symbols seem to be in order" | ||||
| fi # if on Darwin | ||||
|  | ||||
| ############################################################################### | ||||
| # Check for no OpenBLAS | ||||
| # TODO Check for no Protobuf symbols (not finished) | ||||
| # Print *all* runtime dependencies | ||||
| ############################################################################### | ||||
| # We have to loop through all shared libraries for this | ||||
| if [[ "$(uname)" == 'Darwin' ]]; then | ||||
|   all_dylibs=($(find "$install_root" -name '*.dylib')) | ||||
|   for dylib in "${all_dylibs[@]}"; do | ||||
|     echo "All dependencies of $dylib are $(otool -L $dylib) with rpath $(otool -l $dylib | grep LC_RPATH -A2)" | ||||
|  | ||||
|     # Check that OpenBlas is not linked to on Macs | ||||
|     echo "Checking the OpenBLAS is not linked to" | ||||
|     if [[ -n "$(otool -L $dylib | grep -i openblas)" ]]; then | ||||
|       echo "ERROR: Found openblas as a dependency of $dylib" | ||||
|       echo "Full dependencies is: $(otool -L $dylib)" | ||||
|       exit 1 | ||||
|     fi | ||||
|  | ||||
|     # Check for protobuf symbols | ||||
|     #proto_symbols="$(nm $dylib | grep protobuf)" || true | ||||
|     #if [[ -n "$proto_symbols" ]]; then | ||||
|     #  echo "ERROR: Detected protobuf symbols in $dylib" | ||||
|     #  echo "Symbols are $proto_symbols" | ||||
|     #  exit 1 | ||||
|     #fi | ||||
|   done | ||||
| else | ||||
|   all_libs=($(find "$install_root" -name '*.so')) | ||||
|   for lib in "${all_libs[@]}"; do | ||||
|     echo "All dependencies of $lib are $(ldd $lib) with runpath $(objdump -p $lib | grep RUNPATH)" | ||||
|  | ||||
|     # Check for protobuf symbols | ||||
|     #proto_symbols=$(nm $lib | grep protobuf) || true | ||||
|     #if [[ -n "$proto_symbols" ]]; then | ||||
|     #  echo "ERROR: Detected protobuf symbols in $lib" | ||||
|     #  echo "Symbols are $proto_symbols" | ||||
|     #  exit 1 | ||||
|     #fi | ||||
|   done | ||||
| fi | ||||
|  | ||||
| setup_link_flags () { | ||||
|   REF_LIB="-Wl,-R${install_root}/lib" | ||||
|   if [[ "$(uname)" == 'Darwin' ]]; then | ||||
|     REF_LIB="-Wl,-rpath ${install_root}/lib" | ||||
|   fi | ||||
|   ADDITIONAL_LINKER_FLAGS="" | ||||
|   if [[ "$(uname)" == 'Linux' ]]; then | ||||
|     ADDITIONAL_LINKER_FLAGS="-Wl,--no-as-needed" | ||||
|   fi | ||||
|   C10_LINK_FLAGS="" | ||||
|   if [ -f "${install_root}/lib/libc10.so" ] || [ -f "${install_root}/lib/libc10.dylib" ]; then | ||||
|     C10_LINK_FLAGS="-lc10" | ||||
|   fi | ||||
|   TORCH_CPU_LINK_FLAGS="" | ||||
|   if [ -f "${install_root}/lib/libtorch_cpu.so" ] || [ -f "${install_root}/lib/libtorch_cpu.dylib" ]; then | ||||
|     TORCH_CPU_LINK_FLAGS="-ltorch_cpu" | ||||
|   fi | ||||
|   TORCH_CUDA_LINK_FLAGS="" | ||||
|   if [ -f "${install_root}/lib/libtorch_cuda.so" ] || [ -f "${install_root}/lib/libtorch_cuda.dylib" ]; then | ||||
|     TORCH_CUDA_LINK_FLAGS="-ltorch_cuda" | ||||
|   elif [ -f "${install_root}/lib/libtorch_cuda_cpp.so" ] && [ -f "${install_root}/lib/libtorch_cuda_cpp.so" ] || \ | ||||
|     [ -f "${install_root}/lib/libtorch_cuda_cu.dylib" ] && [ -f "${install_root}/lib/libtorch_cuda_cu.dylib" ]; then | ||||
|     TORCH_CUDA_LINK_FLAGS="-ltorch_cuda_cpp -ltorch_cuda_cu" | ||||
|   fi | ||||
| } | ||||
|  | ||||
| TEST_CODE_DIR="$(dirname $(realpath ${BASH_SOURCE[0]}))/test_example_code" | ||||
| build_and_run_example_cpp () { | ||||
|   if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     GLIBCXX_USE_CXX11_ABI=1 | ||||
|   else | ||||
|     GLIBCXX_USE_CXX11_ABI=0 | ||||
|   fi | ||||
|   setup_link_flags | ||||
|   g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1 | ||||
|   ./$1 | ||||
| } | ||||
|  | ||||
| build_example_cpp_with_incorrect_abi () { | ||||
|   if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     GLIBCXX_USE_CXX11_ABI=0 | ||||
|   else | ||||
|     GLIBCXX_USE_CXX11_ABI=1 | ||||
|   fi | ||||
|   set +e | ||||
|   setup_link_flags | ||||
|   g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1 | ||||
|   ERRCODE=$? | ||||
|   set -e | ||||
|   if [ "$ERRCODE" -eq "0" ]; then | ||||
|     echo "Building example with incorrect ABI didn't throw error. Aborting." | ||||
|     exit 1 | ||||
|   else | ||||
|     echo "Building example with incorrect ABI throws expected error. Proceeding." | ||||
|   fi | ||||
| } | ||||
|  | ||||
| ############################################################################### | ||||
| # Check simple Python/C++ calls | ||||
| ############################################################################### | ||||
| if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then | ||||
|   # NS: Set LD_LIBRARY_PATH for CUDA builds, but perhaps it should be removed | ||||
|   if [[ "$DESIRED_CUDA" == "cu"* ]]; then | ||||
|     export LD_LIBRARY_PATH=/usr/local/cuda/lib64 | ||||
|   fi | ||||
|   build_and_run_example_cpp simple-torch-test | ||||
|   # `_GLIBCXX_USE_CXX11_ABI` is always ignored by gcc in devtoolset7, so we test | ||||
|   # the expected failure case for Ubuntu 16.04 + gcc 5.4 only. | ||||
|   if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     build_example_cpp_with_incorrect_abi simple-torch-test | ||||
|   fi | ||||
| else | ||||
|   pushd /tmp | ||||
|   python -c 'import torch' | ||||
|   popd | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check torch.git_version | ||||
| ############################################################################### | ||||
| if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then | ||||
|   pushd /tmp | ||||
|   python -c 'import torch; assert torch.version.git_version != "Unknown"' | ||||
|   python -c 'import torch; assert torch.version.git_version != None' | ||||
|   popd | ||||
| fi | ||||
|  | ||||
|  | ||||
| ############################################################################### | ||||
| # Check for MKL | ||||
| ############################################################################### | ||||
|  | ||||
| if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then | ||||
|   echo "Checking that MKL is available" | ||||
|   build_and_run_example_cpp check-torch-mkl | ||||
| elif [[ "$(uname -m)" != "arm64" && "$(uname -m)" != "s390x" ]]; then | ||||
|   if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]]; then | ||||
|     if [[ "$(uname -m)" == "aarch64" ]]; then | ||||
|       echo "Checking that MKLDNN is available on aarch64" | ||||
|       pushd /tmp | ||||
|       python -c 'import torch; exit(0 if torch.backends.mkldnn.is_available() else 1)' | ||||
|       popd | ||||
|     else | ||||
|       echo "Checking that MKL is available" | ||||
|       pushd /tmp | ||||
|       python -c 'import torch; exit(0 if torch.backends.mkl.is_available() else 1)' | ||||
|       popd | ||||
|     fi | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check for XNNPACK | ||||
| ############################################################################### | ||||
|  | ||||
| if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then | ||||
|   echo "Checking that XNNPACK is available" | ||||
|   build_and_run_example_cpp check-torch-xnnpack | ||||
| else | ||||
|   if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]] && [[ "$(uname -m)" != "s390x"  ]]; then | ||||
|     echo "Checking that XNNPACK is available" | ||||
|     pushd /tmp | ||||
|     python -c 'import torch.backends.xnnpack; exit(0 if torch.backends.xnnpack.enabled else 1)' | ||||
|     popd | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check CUDA configured correctly | ||||
| ############################################################################### | ||||
| # Skip these for Windows machines without GPUs | ||||
| if [[ "$OSTYPE" == "msys" ]]; then | ||||
|     GPUS=$(wmic path win32_VideoController get name) | ||||
|     if [[ ! "$GPUS" == *NVIDIA* ]]; then | ||||
|         echo "Skip CUDA tests for machines without a Nvidia GPU card" | ||||
|         exit 0 | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| # Test that CUDA builds are setup correctly | ||||
| if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'xpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* && "$(uname -m)" != "s390x" ]]; then | ||||
|   if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then | ||||
|     build_and_run_example_cpp check-torch-cuda | ||||
|   else | ||||
|     pushd /tmp | ||||
|     echo "Checking that CUDA archs are setup correctly" | ||||
|     timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()' | ||||
|  | ||||
|     # These have to run after CUDA is initialized | ||||
|  | ||||
|     echo "Checking that magma is available" | ||||
|     python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)' | ||||
|  | ||||
|     echo "Checking that CuDNN is available" | ||||
|     python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)' | ||||
|  | ||||
|     # Validates builds is free of linker regressions reported in https://github.com/pytorch/pytorch/issues/57744 | ||||
|     echo "Checking that exception handling works" | ||||
|     python -c "import torch; from unittest import TestCase;TestCase().assertRaises(RuntimeError, lambda:torch.eye(7, 7, device='cuda:7'))" | ||||
|  | ||||
|     echo "Checking that basic RNN works" | ||||
|     python ${TEST_CODE_DIR}/rnn_smoke.py | ||||
|  | ||||
|     echo "Checking that basic CNN works" | ||||
|     python "${TEST_CODE_DIR}/cnn_smoke.py" | ||||
|  | ||||
|     echo "Test that linalg works" | ||||
|     python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.svd(torch.mm(x.t(), x)))" | ||||
|  | ||||
|     popd | ||||
|   fi # if libtorch | ||||
| fi # if cuda | ||||
|  | ||||
| ########################## | ||||
| # Run parts of smoke tests | ||||
| ########################## | ||||
| if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then | ||||
|   pushd "$(dirname ${BASH_SOURCE[0]})/smoke_test" | ||||
|   python -c "from smoke_test import test_linalg; test_linalg()" | ||||
|   if [[ "$DESIRED_CUDA" == *cuda* ]]; then | ||||
|     python -c "from smoke_test import test_linalg; test_linalg('cuda')" | ||||
|   fi | ||||
|   popd | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check PyTorch supports TCP_TLS gloo transport | ||||
| ############################################################################### | ||||
|  | ||||
| if [[ "$(uname)" == 'Linux' && "$PACKAGE_TYPE" != 'libtorch' ]]; then | ||||
|   GLOO_CHECK="import torch.distributed as dist | ||||
| try: | ||||
|     dist.init_process_group('gloo', rank=0, world_size=1) | ||||
| except RuntimeError as e: | ||||
|     print(e) | ||||
| " | ||||
|   RESULT=`GLOO_DEVICE_TRANSPORT=TCP_TLS MASTER_ADDR=localhost MASTER_PORT=63945 python -c "$GLOO_CHECK"` | ||||
|   GLOO_TRANSPORT_IS_NOT_SUPPORTED='gloo transport is not supported' | ||||
|   if [[ "$RESULT" =~ "$GLOO_TRANSPORT_IS_NOT_SUPPORTED" ]]; then | ||||
|     echo "PyTorch doesn't support TLS_TCP transport, please build with USE_GLOO_WITH_OPENSSL=1" | ||||
|     exit 1 | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check for C++ ABI compatibility between gcc7 and gcc9 compiled binaries | ||||
| ############################################################################### | ||||
| if [[ "$(uname)" == 'Linux' && ("$PACKAGE_TYPE" == 'conda' || "$PACKAGE_TYPE" == 'manywheel')]]; then | ||||
|   pushd /tmp | ||||
|   python -c "import torch; exit(0 if torch.compiled_with_cxx11_abi() else (0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1011' else 1))" | ||||
|   popd | ||||
| fi | ||||
| @ -6,12 +6,6 @@ if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then | ||||
|     # Save the absolute path in case later we chdir (as occurs in the gpu perf test) | ||||
|     script_dir="$( cd "$(dirname "${BASH_SOURCE[0]}")" || exit ; pwd -P )" | ||||
|  | ||||
|     if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then | ||||
|         # This is really weird, but newer sccache somehow produces broken binary | ||||
|         # see https://github.com/pytorch/pytorch/issues/139188 | ||||
|         sudo mv /opt/cache/bin/sccache-0.2.14a /opt/cache/bin/sccache | ||||
|     fi | ||||
|  | ||||
|     if which sccache > /dev/null; then | ||||
|         # Save sccache logs to file | ||||
|         sccache --stop-server > /dev/null  2>&1 || true | ||||
|  | ||||
| @ -81,15 +81,14 @@ function pip_install_whl() { | ||||
|  | ||||
| function pip_install() { | ||||
|   # retry 3 times | ||||
|   pip_install_pkg="python3 -m pip install --progress-bar off" | ||||
|   ${pip_install_pkg} "$@" || \ | ||||
|     ${pip_install_pkg} "$@" || \ | ||||
|     ${pip_install_pkg} "$@" | ||||
|   # old versions of pip don't have the "--progress-bar" flag | ||||
|   pip install --progress-bar off "$@" || pip install --progress-bar off "$@" || pip install --progress-bar off "$@" ||\ | ||||
|   pip install "$@" || pip install "$@" || pip install "$@" | ||||
| } | ||||
|  | ||||
| function pip_uninstall() { | ||||
|   # uninstall 2 times | ||||
|   pip3 uninstall -y "$@" || pip3 uninstall -y "$@" | ||||
|   pip uninstall -y "$@" || pip uninstall -y "$@" | ||||
| } | ||||
|  | ||||
| function get_exit_code() { | ||||
| @ -105,12 +104,32 @@ function get_bazel() { | ||||
|   # version of Bazelisk to fetch the platform specific version of | ||||
|   # Bazel to use from .bazelversion. | ||||
|   retry curl --location --output tools/bazel \ | ||||
|     https://raw.githubusercontent.com/bazelbuild/bazelisk/v1.23.0/bazelisk.py | ||||
|     https://raw.githubusercontent.com/bazelbuild/bazelisk/v1.16.0/bazelisk.py | ||||
|   shasum --algorithm=1 --check \ | ||||
|     <(echo '01df9cf7f08dd80d83979ed0d0666a99349ae93c  tools/bazel') | ||||
|     <(echo 'd4369c3d293814d3188019c9f7527a948972d9f8  tools/bazel') | ||||
|   chmod u+x tools/bazel | ||||
| } | ||||
|  | ||||
| # This function is bazel specific because of the bug | ||||
| # in the bazel that requires some special paths massaging | ||||
| # as a workaround. See | ||||
| # https://github.com/bazelbuild/bazel/issues/10167 | ||||
| function install_sccache_nvcc_for_bazel() { | ||||
|   sudo mv /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc-real | ||||
|  | ||||
|   # Write the `/usr/local/cuda/bin/nvcc` | ||||
|   cat << EOF | sudo tee /usr/local/cuda/bin/nvcc | ||||
| #!/bin/sh | ||||
| if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then | ||||
|   exec sccache /usr/local/cuda/bin/nvcc "\$@" | ||||
| else | ||||
|   exec external/local_cuda/cuda/bin/nvcc-real "\$@" | ||||
| fi | ||||
| EOF | ||||
|  | ||||
|   sudo chmod +x /usr/local/cuda/bin/nvcc | ||||
| } | ||||
|  | ||||
| function install_monkeytype { | ||||
|   # Install MonkeyType | ||||
|   pip_install MonkeyType | ||||
| @ -172,27 +191,14 @@ function install_torchrec_and_fbgemm() { | ||||
|   pip_uninstall torchrec-nightly | ||||
|   pip_uninstall fbgemm-gpu-nightly | ||||
|   pip_install setuptools-git-versioning scikit-build pyre-extensions | ||||
|  | ||||
|   # TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it | ||||
|   # seems to be an sccache-related issue | ||||
|   if [[ "$IS_A100_RUNNER" == "1" ]]; then | ||||
|     unset CMAKE_CUDA_COMPILER_LAUNCHER | ||||
|     sudo mv /opt/cache/bin /opt/cache/bin-backup | ||||
|   fi | ||||
|  | ||||
|   # See https://github.com/pytorch/pytorch/issues/106971 | ||||
|   CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu" | ||||
|   pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}" | ||||
|  | ||||
|   if [[ "$IS_A100_RUNNER" == "1" ]]; then | ||||
|     export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache | ||||
|     sudo mv /opt/cache/bin-backup /opt/cache/bin | ||||
|   fi | ||||
| } | ||||
|  | ||||
| function clone_pytorch_xla() { | ||||
|   if [[ ! -d ./xla ]]; then | ||||
|     git clone --recursive -b r2.6 https://github.com/pytorch/xla.git | ||||
|     git clone --recursive --quiet https://github.com/pytorch/xla.git | ||||
|     pushd xla | ||||
|     # pin the xla hash so that we don't get broken by changes to xla | ||||
|     git checkout "$(cat ../.github/ci_commit_pins/xla.txt)" | ||||
| @ -221,12 +227,6 @@ function checkout_install_torchbench() { | ||||
|   popd | ||||
| } | ||||
|  | ||||
| function install_torchao() { | ||||
|   local commit | ||||
|   commit=$(get_pinned_commit torchao) | ||||
|   pip_install --no-use-pep517 --user "git+https://github.com/pytorch/ao.git@${commit}" | ||||
| } | ||||
|  | ||||
| function print_sccache_stats() { | ||||
|   echo 'PyTorch Build Statistics' | ||||
|   sccache --show-stats | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| from datetime import datetime, timedelta, timezone | ||||
| from datetime import datetime, timedelta | ||||
| from tempfile import mkdtemp | ||||
|  | ||||
| from cryptography import x509 | ||||
| @ -42,10 +42,11 @@ def create_cert(path, C, ST, L, O, key): | ||||
|         .issuer_name(issuer) | ||||
|         .public_key(key.public_key()) | ||||
|         .serial_number(x509.random_serial_number()) | ||||
|         .not_valid_before(datetime.now(timezone.utc)) | ||||
|         .not_valid_before(datetime.utcnow()) | ||||
|         .not_valid_after( | ||||
|             # Our certificate will be valid for 10 days | ||||
|             datetime.now(timezone.utc) + timedelta(days=10) | ||||
|             datetime.utcnow() | ||||
|             + timedelta(days=10) | ||||
|         ) | ||||
|         .add_extension( | ||||
|             x509.BasicConstraints(ca=True, path_length=None), | ||||
| @ -87,10 +88,11 @@ def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key): | ||||
|         .issuer_name(ca_cert.subject) | ||||
|         .public_key(csr_cert.public_key()) | ||||
|         .serial_number(x509.random_serial_number()) | ||||
|         .not_valid_before(datetime.now(timezone.utc)) | ||||
|         .not_valid_before(datetime.utcnow()) | ||||
|         .not_valid_after( | ||||
|             # Our certificate will be valid for 10 days | ||||
|             datetime.now(timezone.utc) + timedelta(days=10) | ||||
|             datetime.utcnow() | ||||
|             + timedelta(days=10) | ||||
|             # Sign our certificate with our private key | ||||
|         ) | ||||
|         .sign(private_ca_key, hashes.SHA256()) | ||||
|  | ||||
| @ -1,5 +1,4 @@ | ||||
| #!/bin/bash | ||||
| set -x | ||||
|  | ||||
| # shellcheck disable=SC2034 | ||||
| # shellcheck source=./macos-common.sh | ||||
| @ -10,13 +9,15 @@ if [[ -n "$CONDA_ENV" ]]; then | ||||
|   export PATH="$CONDA_ENV/bin":$PATH | ||||
| fi | ||||
|  | ||||
| # Test that OpenMP is enabled | ||||
| pushd test | ||||
| if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then | ||||
|   echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False" | ||||
|   exit 1 | ||||
| # Test that OpenMP is enabled for non-arm64 build | ||||
| if [[ ${BUILD_ENVIRONMENT} != *arm64* ]]; then | ||||
|   pushd test | ||||
|   if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available()))") == "1" ]]; then | ||||
|     echo "Build should have OpenMP enabled, but torch.backends.openmp.is_available() is False" | ||||
|     exit 1 | ||||
|   fi | ||||
|   popd | ||||
| fi | ||||
| popd | ||||
|  | ||||
| setup_test_python() { | ||||
|   # The CircleCI worker hostname doesn't resolve to an address. | ||||
| @ -26,9 +27,8 @@ setup_test_python() { | ||||
|   echo "Ninja version: $(ninja --version)" | ||||
|   echo "Python version: $(which python) ($(python --version))" | ||||
|  | ||||
|   # Set the limit on open file handles to 16384 | ||||
|   # might help with intermittent compiler test failures | ||||
|   ulimit -n 16384 | ||||
|   # Increase default limit on open file handles from 256 to 1024 | ||||
|   ulimit -n 1024 | ||||
| } | ||||
|  | ||||
| test_python_all() { | ||||
| @ -149,146 +149,9 @@ test_jit_hooks() { | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| torchbench_setup_macos() { | ||||
|   git clone --recursive https://github.com/pytorch/vision torchvision | ||||
|   git clone --recursive https://github.com/pytorch/audio torchaudio | ||||
|  | ||||
|   pushd torchvision | ||||
|   git fetch | ||||
|   git checkout "$(cat ../.github/ci_commit_pins/vision.txt)" | ||||
|   git submodule update --init --recursive | ||||
|   python setup.py clean | ||||
|   python setup.py develop | ||||
|   popd | ||||
|  | ||||
|   pushd torchaudio | ||||
|   git fetch | ||||
|   git checkout "$(cat ../.github/ci_commit_pins/audio.txt)" | ||||
|   git submodule update --init --recursive | ||||
|   python setup.py clean | ||||
|   python setup.py develop | ||||
|   popd | ||||
|  | ||||
|   # Shellcheck doesn't like it when you pass no arguments to a function that can take args. See https://www.shellcheck.net/wiki/SC2120 | ||||
|   # shellcheck disable=SC2119,SC2120 | ||||
|   checkout_install_torchbench | ||||
| } | ||||
|  | ||||
| conda_benchmark_deps() { | ||||
|   conda install -y astunparse numpy scipy ninja pyyaml setuptools cmake typing-extensions requests protobuf numba cython scikit-learn | ||||
|   conda install -y -c conda-forge librosa | ||||
| } | ||||
|  | ||||
|  | ||||
| test_torchbench_perf() { | ||||
|   print_cmake_info | ||||
|  | ||||
|   echo "Launching torchbench setup" | ||||
|   conda_benchmark_deps | ||||
|   torchbench_setup_macos | ||||
|  | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|  | ||||
|   local backend=eager | ||||
|   local dtype=notset | ||||
|   local device=mps | ||||
|  | ||||
|   echo "Setup complete, launching torchbench training performance run" | ||||
|   PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|     --performance --backend "$backend" --training --devices "$device" \ | ||||
|     --output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" | ||||
|  | ||||
|   echo "Launching torchbench inference performance run" | ||||
|   PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|     --performance --backend "$backend" --inference --devices "$device" \ | ||||
|     --output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" | ||||
|  | ||||
|   echo "Pytorch benchmark on mps device completed" | ||||
| } | ||||
|  | ||||
| test_torchbench_smoketest() { | ||||
|   print_cmake_info | ||||
|  | ||||
|   echo "Launching torchbench setup" | ||||
|   conda_benchmark_deps | ||||
|   # shellcheck disable=SC2119,SC2120 | ||||
|   torchbench_setup_macos | ||||
|  | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|  | ||||
|   local backend=eager | ||||
|   local dtype=notset | ||||
|   local device=mps | ||||
|  | ||||
|   touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" | ||||
|   touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" | ||||
|  | ||||
|   echo "Setup complete, launching torchbench training performance run" | ||||
|   for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do | ||||
|     PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|       --performance --only "$model" --backend "$backend" --training --devices "$device" \ | ||||
|       --output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" | ||||
|   done | ||||
|  | ||||
|   echo "Launching torchbench inference performance run" | ||||
|   for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do | ||||
|     PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|       --performance --only "$model" --backend "$backend" --inference --devices "$device" \ | ||||
|       --output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" | ||||
|   done | ||||
|  | ||||
|   echo "Pytorch benchmark on mps device completed" | ||||
| } | ||||
|  | ||||
| test_hf_perf() { | ||||
|   print_cmake_info | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|   conda_benchmark_deps | ||||
|   torchbench_setup_macos | ||||
|  | ||||
|   echo "Launching HuggingFace training perf run" | ||||
|   python "$(pwd)"/benchmarks/dynamo/huggingface.py --backend eager --device mps --performance --training --output="${TEST_REPORTS_DIR}"/hf_training.csv | ||||
|  | ||||
|   echo "Launching HuggingFace inference perf run" | ||||
|   python "$(pwd)"/benchmarks/dynamo/huggingface.py --backend eager --device mps --performance --training --output="${TEST_REPORTS_DIR}"/hf_inference.csv | ||||
|  | ||||
|   echo "HuggingFace benchmark on mps device completed" | ||||
| } | ||||
|  | ||||
| test_timm_perf() { | ||||
|   print_cmake_info | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|   conda_benchmark_deps | ||||
|   torchbench_setup_macos | ||||
|  | ||||
|   echo "Launching timm training perf run" | ||||
|   python "$(pwd)"/benchmarks/dynamo/timm_models.py --backend eager --device mps --performance --training --output="${TEST_REPORTS_DIR}"/timm_training.csv | ||||
|  | ||||
|   echo "Launching timm inference perf run" | ||||
|   python "$(pwd)"/benchmarks/dynamo/timm_models.py --backend eager --device mps --performance --training --output="${TEST_REPORTS_DIR}"/timm_inference.csv | ||||
|  | ||||
|   echo "timm benchmark on mps device completed" | ||||
| } | ||||
|  | ||||
| install_tlparse | ||||
|  | ||||
| if [[ $TEST_CONFIG == *"perf_all"* ]]; then | ||||
|   test_torchbench_perf | ||||
|   test_hf_perf | ||||
|   test_timm_perf | ||||
| elif [[ $TEST_CONFIG == *"perf_torchbench"* ]]; then | ||||
|   test_torchbench_perf | ||||
| elif [[ $TEST_CONFIG == *"perf_hf"* ]]; then | ||||
|   test_hf_perf | ||||
| elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then | ||||
|   test_timm_perf | ||||
| elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then | ||||
|   test_torchbench_smoketest | ||||
| elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then | ||||
| if [[ $NUM_TEST_SHARDS -gt 1 ]]; then | ||||
|   test_python_shard "${SHARD_NUMBER}" | ||||
|   if [[ "${SHARD_NUMBER}" == 1 ]]; then | ||||
|     test_libtorch | ||||
|  | ||||
| @ -1,436 +0,0 @@ | ||||
| #!/bin/bash | ||||
| # shellcheck disable=SC2086,SC2048,SC2068,SC2145,SC2034,SC2207,SC2143 | ||||
| # TODO: Re-enable shellchecks above | ||||
|  | ||||
| set -eux -o pipefail | ||||
|  | ||||
| # Essentially runs pytorch/test/run_test.py, but keeps track of which tests to | ||||
| # skip in a centralized place. | ||||
| # | ||||
| # TODO Except for a few tests, this entire file is a giant TODO. Why are these | ||||
| # tests # failing? | ||||
| # TODO deal with Windows | ||||
|  | ||||
| # This script expects to be in the pytorch root folder | ||||
| if [[ ! -d 'test' || ! -f 'test/run_test.py' ]]; then | ||||
|     echo "builder/test.sh expects to be run from the Pytorch root directory " \ | ||||
|          "but I'm actually in $(pwd)" | ||||
|     exit 2 | ||||
| fi | ||||
|  | ||||
| # Allow master skip of all tests | ||||
| if [[ -n "${SKIP_ALL_TESTS:-}" ]]; then | ||||
|     exit 0 | ||||
| fi | ||||
|  | ||||
| # If given specific test params then just run those | ||||
| if [[ -n "${RUN_TEST_PARAMS:-}" ]]; then | ||||
|     echo "$(date) :: Calling user-command $(pwd)/test/run_test.py ${RUN_TEST_PARAMS[@]}" | ||||
|     python test/run_test.py ${RUN_TEST_PARAMS[@]} | ||||
|     exit 0 | ||||
| fi | ||||
|  | ||||
| # Function to retry functions that sometimes timeout or have flaky failures | ||||
| retry () { | ||||
|     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) | ||||
| } | ||||
|  | ||||
| # Parameters | ||||
| ############################################################################## | ||||
| if [[ "$#" != 3 ]]; then | ||||
|   if [[ -z "${DESIRED_PYTHON:-}" || -z "${DESIRED_CUDA:-}" || -z "${PACKAGE_TYPE:-}" ]]; then | ||||
|     echo "USAGE: run_tests.sh  PACKAGE_TYPE  DESIRED_PYTHON  DESIRED_CUDA" | ||||
|     echo "The env variable PACKAGE_TYPE must be set to 'conda' or 'manywheel' or 'libtorch'" | ||||
|     echo "The env variable DESIRED_PYTHON must be set like '2.7mu' or '3.6m' etc" | ||||
|     echo "The env variable DESIRED_CUDA must be set like 'cpu' or 'cu80' etc" | ||||
|     exit 1 | ||||
|   fi | ||||
|   package_type="$PACKAGE_TYPE" | ||||
|   py_ver="$DESIRED_PYTHON" | ||||
|   cuda_ver="$DESIRED_CUDA" | ||||
| else | ||||
|   package_type="$1" | ||||
|   py_ver="$2" | ||||
|   cuda_ver="$3" | ||||
| fi | ||||
|  | ||||
| if [[ "$cuda_ver" == 'cpu-cxx11-abi' ]]; then | ||||
|     cuda_ver="cpu" | ||||
| fi | ||||
|  | ||||
| # cu80, cu90, cu100, cpu | ||||
| if [[ ${#cuda_ver} -eq 4 ]]; then | ||||
|     cuda_ver_majmin="${cuda_ver:2:1}.${cuda_ver:3:1}" | ||||
| elif [[ ${#cuda_ver} -eq 5 ]]; then | ||||
|     cuda_ver_majmin="${cuda_ver:2:2}.${cuda_ver:4:1}" | ||||
| fi | ||||
|  | ||||
| NUMPY_PACKAGE="" | ||||
| if [[ ${py_ver} == "3.10" ]]; then | ||||
|     PROTOBUF_PACKAGE="protobuf>=3.17.2" | ||||
|     NUMPY_PACKAGE="numpy>=1.21.2" | ||||
| else | ||||
|     PROTOBUF_PACKAGE="protobuf=3.14.0" | ||||
| fi | ||||
|  | ||||
| # Environment initialization | ||||
| if [[ "$(uname)" == Darwin ]]; then | ||||
|     # Install the testing dependencies | ||||
|     retry conda install -yq future hypothesis ${NUMPY_PACKAGE} ${PROTOBUF_PACKAGE} pytest setuptools six typing_extensions pyyaml | ||||
| else | ||||
|     retry pip install -qr requirements.txt || true | ||||
|     retry pip install -q hypothesis protobuf pytest setuptools || true | ||||
|     numpy_ver=1.15 | ||||
|     case "$(python --version 2>&1)" in | ||||
|       *2* | *3.5* | *3.6*) | ||||
|         numpy_ver=1.11 | ||||
|         ;; | ||||
|     esac | ||||
|     retry pip install -q "numpy==${numpy_ver}" || true | ||||
| fi | ||||
|  | ||||
| echo "Testing with:" | ||||
| pip freeze | ||||
| conda list || true | ||||
|  | ||||
| ############################################################################## | ||||
| # Smoke tests | ||||
| ############################################################################## | ||||
| # TODO use check_binary.sh, which requires making sure it runs on Windows | ||||
| pushd / | ||||
| echo "Smoke testing imports" | ||||
| python -c 'import torch' | ||||
|  | ||||
| # Test that MKL is there | ||||
| if [[ "$(uname)" == 'Darwin' && "$package_type" == *wheel ]]; then | ||||
|     echo 'Not checking for MKL on Darwin wheel packages' | ||||
| else | ||||
|     echo "Checking that MKL is available" | ||||
|     python -c 'import torch; exit(0 if torch.backends.mkl.is_available() else 1)' | ||||
| fi | ||||
|  | ||||
| if [[ "$OSTYPE" == "msys" ]]; then | ||||
|     GPUS=$(wmic path win32_VideoController get name) | ||||
|     if [[ ! "$GPUS" == *NVIDIA* ]]; then | ||||
|         echo "Skip CUDA tests for machines without a Nvidia GPU card" | ||||
|         exit 0 | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| # Test that the version number is consistent during building and testing | ||||
| if [[ "$PYTORCH_BUILD_NUMBER" -gt 1 ]]; then | ||||
|     expected_version="${PYTORCH_BUILD_VERSION}.post${PYTORCH_BUILD_NUMBER}" | ||||
| else | ||||
|     expected_version="${PYTORCH_BUILD_VERSION}" | ||||
| fi | ||||
| echo "Checking that we are testing the package that is just built" | ||||
| python -c "import torch; exit(0 if torch.__version__ == '$expected_version' else 1)" | ||||
|  | ||||
| # Test that CUDA builds are setup correctly | ||||
| if [[ "$cuda_ver" != 'cpu' ]]; then | ||||
|     cuda_installed=1 | ||||
|     nvidia-smi || cuda_installed=0 | ||||
|     if [[ "$cuda_installed" == 0 ]]; then | ||||
|       echo "Skip CUDA tests for machines without a Nvidia GPU card" | ||||
|     else | ||||
|       # Test CUDA archs | ||||
|       echo "Checking that CUDA archs are setup correctly" | ||||
|       timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()' | ||||
|  | ||||
|       # These have to run after CUDA is initialized | ||||
|       echo "Checking that magma is available" | ||||
|       python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)' | ||||
|       echo "Checking that CuDNN is available" | ||||
|       python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)' | ||||
|     fi | ||||
| fi | ||||
|  | ||||
| # Check that OpenBlas is not linked to on MacOS | ||||
| if [[ "$(uname)" == 'Darwin' ]]; then | ||||
|     echo "Checking the OpenBLAS is not linked to" | ||||
|     all_dylibs=($(find "$(python -c "import site; print(site.getsitepackages()[0])")"/torch -name '*.dylib')) | ||||
|     for dylib in "${all_dylibs[@]}"; do | ||||
|         if [[ -n "$(otool -L $dylib | grep -i openblas)" ]]; then | ||||
|             echo "Found openblas as a dependency of $dylib" | ||||
|             echo "Full dependencies is: $(otool -L $dylib)" | ||||
|             exit 1 | ||||
|         fi | ||||
|     done | ||||
|  | ||||
|     echo "Checking that OpenMP is available" | ||||
|     python -c "import torch; exit(0 if torch.backends.openmp.is_available() else 1)" | ||||
| fi | ||||
|  | ||||
| popd | ||||
|  | ||||
| # TODO re-enable the other tests after the nightlies are moved to CI. This is | ||||
| # because the binaries keep breaking, often from additional tests, that aren't | ||||
| # real problems. Once these are on circleci and a smoke-binary-build is added | ||||
| # to PRs then this should stop happening and these can be re-enabled. | ||||
| echo "Not running unit tests. Hopefully these problems are caught by CI" | ||||
| exit 0 | ||||
|  | ||||
|  | ||||
| ############################################################################## | ||||
| # Running unit tests (except not right now) | ||||
| ############################################################################## | ||||
| echo "$(date) :: Starting tests for $package_type package for python$py_ver and $cuda_ver" | ||||
|  | ||||
| # We keep track of exact tests to skip, as otherwise we would be hardly running | ||||
| # any tests. But b/c of issues working with pytest/normal-python-test/ and b/c | ||||
| # of special snowflake tests in test/run_test.py we also take special care of | ||||
| # those | ||||
| tests_to_skip=() | ||||
|  | ||||
| # | ||||
| # Entire file exclusions | ||||
| ############################################################################## | ||||
| entire_file_exclusions=("-x") | ||||
|  | ||||
| # cpp_extensions doesn't work with pytest, so we exclude it from the pytest run | ||||
| # here and then manually run it later. Note that this is only because this | ||||
| # entire_fil_exclusions flag is only passed to the pytest run | ||||
| entire_file_exclusions+=("cpp_extensions") | ||||
|  | ||||
| # TODO temporary line to fix next days nightlies, but should be removed when | ||||
| # issue is fixed | ||||
| entire_file_exclusions+=('type_info') | ||||
|  | ||||
| if [[ "$cuda_ver" == 'cpu' ]]; then | ||||
|     # test/test_cuda.py exits early if the installed torch is not built with | ||||
|     # CUDA, but the exit doesn't work when running with pytest, so pytest will | ||||
|     # still try to run all the CUDA tests and then fail | ||||
|     entire_file_exclusions+=("cuda") | ||||
|     entire_file_exclusions+=("nccl") | ||||
| fi | ||||
|  | ||||
| if [[ "$(uname)" == 'Darwin' || "$OSTYPE" == "msys" ]]; then | ||||
|     # pytest on Mac doesn't like the exits in these files | ||||
|     entire_file_exclusions+=('c10d') | ||||
|     entire_file_exclusions+=('distributed') | ||||
|  | ||||
|     # pytest doesn't mind the exit but fails the tests. On Mac we run this | ||||
|     # later without pytest | ||||
|     entire_file_exclusions+=('thd_distributed') | ||||
| fi | ||||
|  | ||||
|  | ||||
| # | ||||
| # Universal flaky tests | ||||
| ############################################################################## | ||||
|  | ||||
| # RendezvousEnvTest sometimes hangs forever | ||||
| # Otherwise it will fail on CUDA with | ||||
| #   Traceback (most recent call last): | ||||
| #     File "test_c10d.py", line 179, in test_common_errors | ||||
| #       next(gen) | ||||
| #   AssertionError: ValueError not raised | ||||
| tests_to_skip+=('RendezvousEnvTest and test_common_errors') | ||||
|  | ||||
| # This hung forever once on conda_3.5_cu92 | ||||
| tests_to_skip+=('TestTorch and test_sum_dim') | ||||
|  | ||||
| # test_trace_warn isn't actually flaky, but it doesn't work with pytest so we | ||||
| # just skip it | ||||
| tests_to_skip+=('TestJit and test_trace_warn') | ||||
| # | ||||
| # Python specific flaky tests | ||||
| ############################################################################## | ||||
|  | ||||
| # test_dataloader.py:721: AssertionError | ||||
| # looks like a timeout, but interestingly only appears on python 3 | ||||
| if [[ "$py_ver" == 3* ]]; then | ||||
|     tests_to_skip+=('TestDataLoader and test_proper_exit') | ||||
| fi | ||||
|  | ||||
| # | ||||
| # CUDA flaky tests, all package types | ||||
| ############################################################################## | ||||
| if [[ "$cuda_ver" != 'cpu' ]]; then | ||||
|  | ||||
|     # | ||||
|     # DistributedDataParallelTest | ||||
|     # All of these seem to fail | ||||
|     tests_to_skip+=('DistributedDataParallelTest') | ||||
|  | ||||
|     # | ||||
|     # RendezvousEnvTest | ||||
|     # Traceback (most recent call last): | ||||
|     #   File "test_c10d.py", line 201, in test_nominal | ||||
|     #     store0, rank0, size0 = next(gen0) | ||||
|     #   File "/opt/python/cp36-cp36m/lib/python3.6/site-packages/torch/distributed/rendezvous.py", line 131, in _env_rendezvous_handler | ||||
|     #     store = TCPStore(master_addr, master_port, start_daemon) | ||||
|     # RuntimeError: Address already in use | ||||
|     tests_to_skip+=('RendezvousEnvTest and test_nominal') | ||||
|  | ||||
|     # | ||||
|     # TestCppExtension | ||||
|     # | ||||
|     # Traceback (most recent call last): | ||||
|     #   File "test_cpp_extensions.py", line 134, in test_jit_cudnn_extension | ||||
|     #     with_cuda=True) | ||||
|     #   File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 552, in load | ||||
|     #     with_cuda) | ||||
|     #   File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 729, in _jit_compile | ||||
|     #     return _import_module_from_library(name, build_directory) | ||||
|     #   File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 867, in _import_module_from_library | ||||
|     #     return imp.load_module(module_name, file, path, description) | ||||
|     #   File "/opt/python/cp35-cp35m/lib/python3.5/imp.py", line 243, in load_module | ||||
|     #     return load_dynamic(name, filename, file) | ||||
|     #   File "/opt/python/cp35-cp35m/lib/python3.5/imp.py", line 343, in load_dynamic | ||||
|     #     return _load(spec) | ||||
|     #   File "<frozen importlib._bootstrap>", line 693, in _load | ||||
|     #   File "<frozen importlib._bootstrap>", line 666, in _load_unlocked | ||||
|     #   File "<frozen importlib._bootstrap>", line 577, in module_from_spec | ||||
|     #   File "<frozen importlib._bootstrap_external>", line 938, in create_module | ||||
|     #   File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed | ||||
|     # ImportError: libcudnn.so.7: cannot open shared object file: No such file or directory | ||||
|     tests_to_skip+=('TestCppExtension and test_jit_cudnn_extension') | ||||
|  | ||||
|     # | ||||
|     # TestCuda | ||||
|     # | ||||
|  | ||||
|     # 3.7_cu80 | ||||
|     #  RuntimeError: CUDA error: out of memory | ||||
|     tests_to_skip+=('TestCuda and test_arithmetic_large_tensor') | ||||
|  | ||||
|     # 3.7_cu80 | ||||
|     # RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch-nightly_1538097262541/work/aten/src/THC/THCTensorCopy.cu:205 | ||||
|     tests_to_skip+=('TestCuda and test_autogpu') | ||||
|  | ||||
|     # | ||||
|     # TestDistBackend | ||||
|     # | ||||
|  | ||||
|     # Traceback (most recent call last): | ||||
|     #   File "test_thd_distributed.py", line 1046, in wrapper | ||||
|     #     self._join_and_reduce(fn) | ||||
|     #   File "test_thd_distributed.py", line 1108, in _join_and_reduce | ||||
|     #     self.assertEqual(p.exitcode, first_process.exitcode) | ||||
|     #   File "/pytorch/test/common.py", line 399, in assertEqual | ||||
|     #     super(TestCase, self).assertEqual(x, y, message) | ||||
|     # AssertionError: None != 77 : | ||||
|     tests_to_skip+=('TestDistBackend and test_all_gather_group') | ||||
|     tests_to_skip+=('TestDistBackend and test_all_reduce_group_max') | ||||
|     tests_to_skip+=('TestDistBackend and test_all_reduce_group_min') | ||||
|     tests_to_skip+=('TestDistBackend and test_all_reduce_group_sum') | ||||
|     tests_to_skip+=('TestDistBackend and test_all_reduce_group_product') | ||||
|     tests_to_skip+=('TestDistBackend and test_barrier_group') | ||||
|     tests_to_skip+=('TestDistBackend and test_broadcast_group') | ||||
|  | ||||
|     # Traceback (most recent call last): | ||||
|     #   File "test_thd_distributed.py", line 1046, in wrapper | ||||
|     #     self._join_and_reduce(fn) | ||||
|     #   File "test_thd_distributed.py", line 1108, in _join_and_reduce | ||||
|     #     self.assertEqual(p.exitcode, first_process.exitcode) | ||||
|     #   File "/pytorch/test/common.py", line 397, in assertEqual | ||||
|     #     super(TestCase, self).assertLessEqual(abs(x - y), prec, message) | ||||
|     # AssertionError: 12 not less than or equal to 1e-05 | ||||
|     tests_to_skip+=('TestDistBackend and test_barrier') | ||||
|  | ||||
|     # Traceback (most recent call last): | ||||
|     #   File "test_distributed.py", line 1267, in wrapper | ||||
|     #     self._join_and_reduce(fn) | ||||
|     #   File "test_distributed.py", line 1350, in _join_and_reduce | ||||
|     #     self.assertEqual(p.exitcode, first_process.exitcode) | ||||
|     #   File "/pytorch/test/common.py", line 399, in assertEqual | ||||
|     #     super(TestCase, self).assertEqual(x, y, message) | ||||
|     # AssertionError: None != 1 | ||||
|     tests_to_skip+=('TestDistBackend and test_broadcast') | ||||
|  | ||||
|     # Memory leak very similar to all the conda ones below, but appears on manywheel | ||||
|     # 3.6m_cu80 | ||||
|     # AssertionError: 1605632 not less than or equal to 1e-05 : __main__.TestEndToEndHybridFrontendModels.test_vae_cuda leaked 1605632 bytes CUDA memory on device 0 | ||||
|     tests_to_skip+=('TestEndToEndHybridFrontendModels and test_vae_cuda') | ||||
|  | ||||
|     # ________________________ TestNN.test_embedding_bag_cuda ________________________ | ||||
|     # | ||||
|     # self = <test_nn.TestNN testMethod=test_embedding_bag_cuda> | ||||
|     # dtype = torch.float32 | ||||
|     # | ||||
|     #     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") | ||||
|     #     @repeat_test_for_types(ALL_TENSORTYPES) | ||||
|     #     @skipIfRocm | ||||
|     #     def test_embedding_bag_cuda(self, dtype=torch.float): | ||||
|     #         self._test_EmbeddingBag(True, 'sum', False, dtype) | ||||
|     #         self._test_EmbeddingBag(True, 'mean', False, dtype) | ||||
|     #         self._test_EmbeddingBag(True, 'max', False, dtype) | ||||
|     #         if dtype != torch.half: | ||||
|     #             # torch.cuda.sparse.HalfTensor is not enabled. | ||||
|     #             self._test_EmbeddingBag(True, 'sum', True, dtype) | ||||
|     # >           self._test_EmbeddingBag(True, 'mean', True, dtype) | ||||
|     # | ||||
|     # test_nn.py:2144: | ||||
|     # _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | ||||
|     # test_nn.py:2062: in _test_EmbeddingBag | ||||
|     #     _test_vs_Embedding(N, D, B, L) | ||||
|     # test_nn.py:2059: in _test_vs_Embedding | ||||
|     #     self.assertEqual(es_weight_grad, e.weight.grad, needed_prec) | ||||
|     # common.py:373: in assertEqual | ||||
|     #     assertTensorsEqual(x, y) | ||||
|     # common.py:365: in assertTensorsEqual | ||||
|     #     self.assertLessEqual(max_err, prec, message) | ||||
|     # E   AssertionError: tensor(0.0000, device='cuda:0', dtype=torch.float32) not less than or equal to 2e-05 : | ||||
|     #  1 failed, 1202 passed, 19 skipped, 2 xfailed, 796 warnings in 1166.73 seconds = | ||||
|     # Traceback (most recent call last): | ||||
|     #   File "test/run_test.py", line 391, in <module> | ||||
|     #     main() | ||||
|     #   File "test/run_test.py", line 383, in main | ||||
|     #     raise RuntimeError(message) | ||||
|     tests_to_skip+=('TestNN and test_embedding_bag_cuda') | ||||
| fi | ||||
|  | ||||
| ############################################################################## | ||||
| # MacOS specific flaky tests | ||||
| ############################################################################## | ||||
|  | ||||
| if [[ "$(uname)" == 'Darwin' ]]; then | ||||
|     # TestCppExtensions by default uses a temp folder in /tmp. This doesn't | ||||
|     # work for this Mac machine cause there is only one machine and /tmp is | ||||
|     # shared. (All the linux builds are on docker so have their own /tmp). | ||||
|     tests_to_skip+=('TestCppExtension') | ||||
| fi | ||||
|  | ||||
| # Turn the set of tests to skip into an invocation that pytest understands | ||||
| excluded_tests_logic='' | ||||
| for exclusion in "${tests_to_skip[@]}"; do | ||||
|     if [[ -z "$excluded_tests_logic" ]]; then | ||||
|         # Only true for i==0 | ||||
|         excluded_tests_logic="not ($exclusion)" | ||||
|     else | ||||
|         excluded_tests_logic="$excluded_tests_logic and not ($exclusion)" | ||||
|     fi | ||||
| done | ||||
|  | ||||
|  | ||||
| ############################################################################## | ||||
| # Run the tests | ||||
| ############################################################################## | ||||
| echo | ||||
| echo "$(date) :: Calling 'python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k '$excluded_tests_logic'" | ||||
|  | ||||
| python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k "'" "$excluded_tests_logic" "'" | ||||
|  | ||||
| echo | ||||
| echo "$(date) :: Finished 'python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k '$excluded_tests_logic'" | ||||
|  | ||||
| # cpp_extensions don't work with pytest, so we run them without pytest here, | ||||
| # except there's a failure on CUDA builds (documented above), and | ||||
| # cpp_extensions doesn't work on a shared mac machine (also documented above) | ||||
| if [[ "$cuda_ver" == 'cpu' && "$(uname)" != 'Darwin' ]]; then | ||||
|     echo | ||||
|     echo "$(date) :: Calling 'python test/run_test.py -v -i cpp_extensions'" | ||||
|     python test/run_test.py -v -i cpp_extensions | ||||
|     echo | ||||
|     echo "$(date) :: Finished 'python test/run_test.py -v -i cpp_extensions'" | ||||
| fi | ||||
|  | ||||
| # thd_distributed can run on Mac but not in pytest | ||||
| if [[ "$(uname)" == 'Darwin' ]]; then | ||||
|     echo | ||||
|     echo "$(date) :: Calling 'python test/run_test.py -v -i thd_distributed'" | ||||
|     python test/run_test.py -v -i thd_distributed | ||||
|     echo | ||||
|     echo "$(date) :: Finished 'python test/run_test.py -v -i thd_distributed'" | ||||
| fi | ||||
| @ -1,130 +0,0 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import concurrent.futures | ||||
| import distutils.sysconfig | ||||
| import functools | ||||
| import itertools | ||||
| import os | ||||
| import re | ||||
| from pathlib import Path | ||||
| from typing import Any, List, Tuple | ||||
|  | ||||
|  | ||||
| # We also check that there are [not] cxx11 symbols in libtorch | ||||
| # | ||||
| # To check whether it is using cxx11 ABI, check non-existence of symbol: | ||||
| PRE_CXX11_SYMBOLS = ( | ||||
|     "std::basic_string<", | ||||
|     "std::list", | ||||
| ) | ||||
| # To check whether it is using pre-cxx11 ABI, check non-existence of symbol: | ||||
| CXX11_SYMBOLS = ( | ||||
|     "std::__cxx11::basic_string", | ||||
|     "std::__cxx11::list", | ||||
| ) | ||||
| # NOTE: Checking the above symbols in all namespaces doesn't work, because | ||||
| # devtoolset7 always produces some cxx11 symbols even if we build with old ABI, | ||||
| # and CuDNN always has pre-cxx11 symbols even if we build with new ABI using gcc 5.4. | ||||
| # Instead, we *only* check the above symbols in the following namespaces: | ||||
| LIBTORCH_NAMESPACE_LIST = ( | ||||
|     "c10::", | ||||
|     "at::", | ||||
|     "caffe2::", | ||||
|     "torch::", | ||||
| ) | ||||
|  | ||||
|  | ||||
| def _apply_libtorch_symbols(symbols): | ||||
|     return [ | ||||
|         re.compile(f"{x}.*{y}") | ||||
|         for (x, y) in itertools.product(LIBTORCH_NAMESPACE_LIST, symbols) | ||||
|     ] | ||||
|  | ||||
|  | ||||
| LIBTORCH_CXX11_PATTERNS = _apply_libtorch_symbols(CXX11_SYMBOLS) | ||||
|  | ||||
| LIBTORCH_PRE_CXX11_PATTERNS = _apply_libtorch_symbols(PRE_CXX11_SYMBOLS) | ||||
|  | ||||
|  | ||||
| @functools.lru_cache(100) | ||||
| def get_symbols(lib: str) -> List[Tuple[str, str, str]]: | ||||
|     from subprocess import check_output | ||||
|  | ||||
|     lines = check_output(f'nm "{lib}"|c++filt', shell=True) | ||||
|     return [x.split(" ", 2) for x in lines.decode("latin1").split("\n")[:-1]] | ||||
|  | ||||
|  | ||||
| def grep_symbols(lib: str, patterns: List[Any]) -> List[str]: | ||||
|     def _grep_symbols( | ||||
|         symbols: List[Tuple[str, str, str]], patterns: List[Any] | ||||
|     ) -> List[str]: | ||||
|         rc = [] | ||||
|         for _s_addr, _s_type, s_name in symbols: | ||||
|             for pattern in patterns: | ||||
|                 if pattern.match(s_name): | ||||
|                     rc.append(s_name) | ||||
|                     continue | ||||
|         return rc | ||||
|  | ||||
|     all_symbols = get_symbols(lib) | ||||
|     num_workers = 32 | ||||
|     chunk_size = (len(all_symbols) + num_workers - 1) // num_workers | ||||
|  | ||||
|     def _get_symbols_chunk(i): | ||||
|         return all_symbols[i * chunk_size : (i + 1) * chunk_size] | ||||
|  | ||||
|     with concurrent.futures.ThreadPoolExecutor(max_workers=32) as executor: | ||||
|         tasks = [ | ||||
|             executor.submit(_grep_symbols, _get_symbols_chunk(i), patterns) | ||||
|             for i in range(num_workers) | ||||
|         ] | ||||
|         return functools.reduce(list.__add__, (x.result() for x in tasks), []) | ||||
|  | ||||
|  | ||||
| def check_lib_symbols_for_abi_correctness(lib: str, pre_cxx11_abi: bool = True) -> None: | ||||
|     print(f"lib: {lib}") | ||||
|     cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS) | ||||
|     pre_cxx11_symbols = grep_symbols(lib, LIBTORCH_PRE_CXX11_PATTERNS) | ||||
|     num_cxx11_symbols = len(cxx11_symbols) | ||||
|     num_pre_cxx11_symbols = len(pre_cxx11_symbols) | ||||
|     print(f"num_cxx11_symbols: {num_cxx11_symbols}") | ||||
|     print(f"num_pre_cxx11_symbols: {num_pre_cxx11_symbols}") | ||||
|     if pre_cxx11_abi: | ||||
|         if num_cxx11_symbols > 0: | ||||
|             raise RuntimeError( | ||||
|                 f"Found cxx11 symbols, but there shouldn't be any, see: {cxx11_symbols[:100]}" | ||||
|             ) | ||||
|         if num_pre_cxx11_symbols < 1000: | ||||
|             raise RuntimeError("Didn't find enough pre-cxx11 symbols.") | ||||
|         # Check for no recursive iterators, regression test for https://github.com/pytorch/pytorch/issues/133437 | ||||
|         rec_iter_symbols = grep_symbols( | ||||
|             lib, [re.compile("std::filesystem::recursive_directory_iterator.*")] | ||||
|         ) | ||||
|         if len(rec_iter_symbols) > 0: | ||||
|             raise RuntimeError( | ||||
|                 f"recursive_directory_iterator in used pre-CXX11 binaries, see; {rec_iter_symbols}" | ||||
|             ) | ||||
|     else: | ||||
|         if num_pre_cxx11_symbols > 0: | ||||
|             raise RuntimeError( | ||||
|                 f"Found pre-cxx11 symbols, but there shouldn't be any, see: {pre_cxx11_symbols[:100]}" | ||||
|             ) | ||||
|         if num_cxx11_symbols < 100: | ||||
|             raise RuntimeError("Didn't find enought cxx11 symbols") | ||||
|  | ||||
|  | ||||
| def main() -> None: | ||||
|     if "install_root" in os.environ: | ||||
|         install_root = Path(os.getenv("install_root"))  # noqa: SIM112 | ||||
|     else: | ||||
|         if os.getenv("PACKAGE_TYPE") == "libtorch": | ||||
|             install_root = Path(os.getcwd()) | ||||
|         else: | ||||
|             install_root = Path(distutils.sysconfig.get_python_lib()) / "torch" | ||||
|  | ||||
|     libtorch_cpu_path = install_root / "lib" / "libtorch_cpu.so" | ||||
|     pre_cxx11_abi = "cxx11-abi" not in os.getenv("DESIRED_DEVTOOLSET", "") | ||||
|     check_lib_symbols_for_abi_correctness(libtorch_cpu_path, pre_cxx11_abi) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
| @ -1,205 +0,0 @@ | ||||
| import argparse | ||||
|  | ||||
| from torchvision import datasets, transforms | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| import torch.optim as optim | ||||
| from torch.optim.lr_scheduler import StepLR | ||||
|  | ||||
|  | ||||
| class Net(nn.Module): | ||||
|     def __init__(self): | ||||
|         super(Net, self).__init__()  # noqa: UP008 | ||||
|         self.conv1 = nn.Conv2d(1, 32, 3, 1) | ||||
|         self.conv2 = nn.Conv2d(32, 64, 3, 1) | ||||
|         self.dropout1 = nn.Dropout(0.25) | ||||
|         self.dropout2 = nn.Dropout(0.5) | ||||
|         self.fc1 = nn.Linear(9216, 128) | ||||
|         self.fc2 = nn.Linear(128, 10) | ||||
|  | ||||
|     def forward(self, x): | ||||
|         x = self.conv1(x) | ||||
|         x = F.relu(x) | ||||
|         x = self.conv2(x) | ||||
|         x = F.relu(x) | ||||
|         x = F.max_pool2d(x, 2) | ||||
|         x = self.dropout1(x) | ||||
|         x = torch.flatten(x, 1) | ||||
|         x = self.fc1(x) | ||||
|         x = F.relu(x) | ||||
|         x = self.dropout2(x) | ||||
|         x = self.fc2(x) | ||||
|         output = F.log_softmax(x, dim=1) | ||||
|         return output | ||||
|  | ||||
|  | ||||
| def train(args, model, device, train_loader, optimizer, epoch): | ||||
|     model.train() | ||||
|     for batch_idx, (data, target) in enumerate(train_loader): | ||||
|         data, target = data.to(device), target.to(device) | ||||
|         optimizer.zero_grad() | ||||
|         output = model(data) | ||||
|         loss = F.nll_loss(output, target) | ||||
|         loss.backward() | ||||
|         optimizer.step() | ||||
|         if batch_idx % args.log_interval == 0: | ||||
|             print( | ||||
|                 f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}"  # noqa: B950 | ||||
|             ) | ||||
|             if args.dry_run: | ||||
|                 break | ||||
|  | ||||
|  | ||||
| def test(model, device, test_loader): | ||||
|     model.eval() | ||||
|     test_loss = 0 | ||||
|     correct = 0 | ||||
|     with torch.no_grad(): | ||||
|         for data, target in test_loader: | ||||
|             data, target = data.to(device), target.to(device) | ||||
|             output = model(data) | ||||
|             test_loss += F.nll_loss( | ||||
|                 output, target, reduction="sum" | ||||
|             ).item()  # sum up batch loss | ||||
|             pred = output.argmax( | ||||
|                 dim=1, keepdim=True | ||||
|             )  # get the index of the max log-probability | ||||
|             correct += pred.eq(target.view_as(pred)).sum().item() | ||||
|  | ||||
|     test_loss /= len(test_loader.dataset) | ||||
|  | ||||
|     print( | ||||
|         f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n"  # noqa: B950 | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def timed(fn): | ||||
|     start = torch.cuda.Event(enable_timing=True) | ||||
|     end = torch.cuda.Event(enable_timing=True) | ||||
|     start.record() | ||||
|     result = fn() | ||||
|     end.record() | ||||
|     torch.cuda.synchronize() | ||||
|     return result, start.elapsed_time(end) / 1000 | ||||
|  | ||||
|  | ||||
| def main(): | ||||
|     # Training settings | ||||
|     parser = argparse.ArgumentParser(description="PyTorch MNIST Example") | ||||
|     parser.add_argument( | ||||
|         "--batch-size", | ||||
|         type=int, | ||||
|         default=64, | ||||
|         metavar="N", | ||||
|         help="input batch size for training (default: 64)", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--test-batch-size", | ||||
|         type=int, | ||||
|         default=1000, | ||||
|         metavar="N", | ||||
|         help="input batch size for testing (default: 1000)", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--epochs", | ||||
|         type=int, | ||||
|         default=4, | ||||
|         metavar="N", | ||||
|         help="number of epochs to train (default: 14)", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--lr", | ||||
|         type=float, | ||||
|         default=1.0, | ||||
|         metavar="LR", | ||||
|         help="learning rate (default: 1.0)", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--gamma", | ||||
|         type=float, | ||||
|         default=0.7, | ||||
|         metavar="M", | ||||
|         help="Learning rate step gamma (default: 0.7)", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--no-cuda", action="store_true", default=False, help="disables CUDA training" | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--no-mps", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         help="disables macOS GPU training", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--dry-run", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         help="quickly check a single pass", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--seed", type=int, default=1, metavar="S", help="random seed (default: 1)" | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--log-interval", | ||||
|         type=int, | ||||
|         default=100, | ||||
|         metavar="N", | ||||
|         help="how many batches to wait before logging training status", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--save-model", | ||||
|         action="store_true", | ||||
|         default=False, | ||||
|         help="For Saving the current Model", | ||||
|     ) | ||||
|     args = parser.parse_args() | ||||
|     use_cuda = not args.no_cuda and torch.cuda.is_available() | ||||
|     use_mps = not args.no_mps and torch.backends.mps.is_available() | ||||
|  | ||||
|     torch.manual_seed(args.seed) | ||||
|     torch.backends.cuda.matmul.allow_tf32 = True | ||||
|  | ||||
|     if use_cuda: | ||||
|         device = torch.device("cuda") | ||||
|     elif use_mps: | ||||
|         device = torch.device("mps") | ||||
|     else: | ||||
|         device = torch.device("cpu") | ||||
|  | ||||
|     train_kwargs = {"batch_size": args.batch_size} | ||||
|     test_kwargs = {"batch_size": args.test_batch_size} | ||||
|     if use_cuda: | ||||
|         cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True} | ||||
|         train_kwargs.update(cuda_kwargs) | ||||
|         test_kwargs.update(cuda_kwargs) | ||||
|  | ||||
|     transform = transforms.Compose( | ||||
|         [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] | ||||
|     ) | ||||
|     dataset1 = datasets.MNIST("../data", train=True, download=True, transform=transform) | ||||
|     dataset2 = datasets.MNIST("../data", train=False, transform=transform) | ||||
|     train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs) | ||||
|     test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs) | ||||
|  | ||||
|     model = Net().to(device) | ||||
|     opt_model = torch.compile(model, mode="max-autotune") | ||||
|     optimizer = optim.Adadelta(opt_model.parameters(), lr=args.lr) | ||||
|  | ||||
|     scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma) | ||||
|     for epoch in range(1, args.epochs + 1): | ||||
|         print( | ||||
|             f"Training Time: {timed(lambda: train(args, opt_model, device, train_loader, optimizer, epoch))[1]}" | ||||
|         ) | ||||
|         print( | ||||
|             f"Evaluation Time: {timed(lambda: test(opt_model, device, test_loader))[1]}" | ||||
|         ) | ||||
|         scheduler.step() | ||||
|  | ||||
|     if args.save_model: | ||||
|         torch.save(opt_model.state_dict(), "mnist_cnn.pt") | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
| @ -1,385 +0,0 @@ | ||||
| import argparse | ||||
| import importlib | ||||
| import json | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
| import sys | ||||
| from pathlib import Path | ||||
|  | ||||
| import torch | ||||
| import torch._dynamo | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
|  | ||||
|  | ||||
| if "MATRIX_GPU_ARCH_VERSION" in os.environ: | ||||
|     gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION") | ||||
| else: | ||||
|     gpu_arch_ver = os.getenv("GPU_ARCH_VERSION")  # Use fallback if available | ||||
| gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE") | ||||
| channel = os.getenv("MATRIX_CHANNEL") | ||||
| package_type = os.getenv("MATRIX_PACKAGE_TYPE") | ||||
| target_os = os.getenv("TARGET_OS", sys.platform) | ||||
| BASE_DIR = Path(__file__).parent.parent.parent | ||||
|  | ||||
| is_cuda_system = gpu_arch_type == "cuda" | ||||
| NIGHTLY_ALLOWED_DELTA = 3 | ||||
|  | ||||
| MODULES = [ | ||||
|     { | ||||
|         "name": "torchvision", | ||||
|         "repo": "https://github.com/pytorch/vision.git", | ||||
|         "smoke_test": "./vision/test/smoke_test.py", | ||||
|         "extension": "extension", | ||||
|         "repo_name": "vision", | ||||
|     }, | ||||
|     { | ||||
|         "name": "torchaudio", | ||||
|         "repo": "https://github.com/pytorch/audio.git", | ||||
|         "smoke_test": "./audio/test/smoke_test/smoke_test.py --no-ffmpeg", | ||||
|         "extension": "_extension", | ||||
|         "repo_name": "audio", | ||||
|     }, | ||||
| ] | ||||
|  | ||||
|  | ||||
| class Net(nn.Module): | ||||
|     def __init__(self): | ||||
|         super().__init__() | ||||
|         self.conv1 = nn.Conv2d(1, 32, 3, 1) | ||||
|         self.conv2 = nn.Conv2d(32, 64, 3, 1) | ||||
|         self.fc1 = nn.Linear(9216, 1) | ||||
|  | ||||
|     def forward(self, x): | ||||
|         x = self.conv1(x) | ||||
|         x = self.conv2(x) | ||||
|         x = F.max_pool2d(x, 2) | ||||
|         x = torch.flatten(x, 1) | ||||
|         output = self.fc1(x) | ||||
|         return output | ||||
|  | ||||
|  | ||||
| def load_json_from_basedir(filename: str): | ||||
|     try: | ||||
|         with open(BASE_DIR / filename) as fptr: | ||||
|             return json.load(fptr) | ||||
|     except FileNotFoundError as exc: | ||||
|         raise ImportError(f"File {filename} not found error: {exc.strerror}") from exc | ||||
|     except json.JSONDecodeError as exc: | ||||
|         raise ImportError(f"Invalid JSON {filename}") from exc | ||||
|  | ||||
|  | ||||
| def read_release_matrix(): | ||||
|     return load_json_from_basedir("release_matrix.json") | ||||
|  | ||||
|  | ||||
| def test_numpy(): | ||||
|     import numpy as np | ||||
|  | ||||
|     x = np.arange(5) | ||||
|     torch.tensor(x) | ||||
|  | ||||
|  | ||||
| def check_version(package: str) -> None: | ||||
|     release_version = os.getenv("RELEASE_VERSION") | ||||
|     # if release_version is specified, use it to validate the packages | ||||
|     if release_version: | ||||
|         release_matrix = read_release_matrix() | ||||
|         stable_version = release_matrix["torch"] | ||||
|     else: | ||||
|         stable_version = os.getenv("MATRIX_STABLE_VERSION") | ||||
|  | ||||
|     # only makes sense to check nightly package where dates are known | ||||
|     if channel == "nightly": | ||||
|         check_nightly_binaries_date(package) | ||||
|     elif stable_version is not None: | ||||
|         if not torch.__version__.startswith(stable_version): | ||||
|             raise RuntimeError( | ||||
|                 f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}" | ||||
|             ) | ||||
|  | ||||
|         if release_version and package == "all": | ||||
|             for module in MODULES: | ||||
|                 imported_module = importlib.import_module(module["name"]) | ||||
|                 module_version = imported_module.__version__ | ||||
|                 if not module_version.startswith(release_matrix[module["name"]]): | ||||
|                     raise RuntimeError( | ||||
|                         f"{module['name']} version mismatch, expected: \ | ||||
|                             {release_matrix[module['name']]} for channel {channel}. But its {module_version}" | ||||
|                     ) | ||||
|                 else: | ||||
|                     print(f"{module['name']} version actual: {module_version} expected: \ | ||||
|                         {release_matrix[module['name']]} for channel {channel}.") | ||||
|  | ||||
|     else: | ||||
|         print(f"Skip version check for channel {channel} as stable version is None") | ||||
|  | ||||
|  | ||||
| def check_nightly_binaries_date(package: str) -> None: | ||||
|     from datetime import datetime | ||||
|  | ||||
|     format_dt = "%Y%m%d" | ||||
|  | ||||
|     date_t_str = re.findall("dev\\d+", torch.__version__) | ||||
|     date_t_delta = datetime.now() - datetime.strptime(date_t_str[0][3:], format_dt) | ||||
|     if date_t_delta.days >= NIGHTLY_ALLOWED_DELTA: | ||||
|         raise RuntimeError( | ||||
|             f"the binaries are from {date_t_str} and are more than {NIGHTLY_ALLOWED_DELTA} days old!" | ||||
|         ) | ||||
|  | ||||
|     if package == "all": | ||||
|         for module in MODULES: | ||||
|             imported_module = importlib.import_module(module["name"]) | ||||
|             module_version = imported_module.__version__ | ||||
|             date_m_str = re.findall("dev\\d+", module_version) | ||||
|             date_m_delta = datetime.now() - datetime.strptime( | ||||
|                 date_m_str[0][3:], format_dt | ||||
|             ) | ||||
|             print(f"Nightly date check for {module['name']} version {module_version}") | ||||
|             if date_m_delta.days > NIGHTLY_ALLOWED_DELTA: | ||||
|                 raise RuntimeError( | ||||
|                     f"Expected {module['name']} to be less then {NIGHTLY_ALLOWED_DELTA} days. But its {date_m_delta}" | ||||
|                 ) | ||||
|  | ||||
|  | ||||
| def test_cuda_runtime_errors_captured() -> None: | ||||
|     cuda_exception_missed = True | ||||
|     try: | ||||
|         print("Testing test_cuda_runtime_errors_captured") | ||||
|         torch._assert_async(torch.tensor(0, device="cuda")) | ||||
|         torch._assert_async(torch.tensor(0 + 0j, device="cuda")) | ||||
|     except RuntimeError as e: | ||||
|         if re.search("CUDA", f"{e}"): | ||||
|             print(f"Caught CUDA exception with success: {e}") | ||||
|             cuda_exception_missed = False | ||||
|         else: | ||||
|             raise e | ||||
|     if cuda_exception_missed: | ||||
|         raise RuntimeError("Expected CUDA RuntimeError but have not received!") | ||||
|  | ||||
|  | ||||
| def smoke_test_cuda( | ||||
|     package: str, runtime_error_check: str, torch_compile_check: str | ||||
| ) -> None: | ||||
|     if not torch.cuda.is_available() and is_cuda_system: | ||||
|         raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.") | ||||
|  | ||||
|     if package == "all" and is_cuda_system: | ||||
|         for module in MODULES: | ||||
|             imported_module = importlib.import_module(module["name"]) | ||||
|             # TBD for vision move extension module to private so it will | ||||
|             # be _extention. | ||||
|             version = "N/A" | ||||
|             if module["extension"] == "extension": | ||||
|                 version = imported_module.extension._check_cuda_version() | ||||
|             else: | ||||
|                 version = imported_module._extension._check_cuda_version() | ||||
|             print(f"{module['name']} CUDA: {version}") | ||||
|  | ||||
|     # torch.compile is available on macos-arm64 and Linux for python 3.8-3.13 | ||||
|     if ( | ||||
|         torch_compile_check == "enabled" | ||||
|         and sys.version_info < (3, 13, 0) | ||||
|         and target_os in ["linux", "linux-aarch64", "macos-arm64", "darwin"] | ||||
|     ): | ||||
|         smoke_test_compile("cuda" if torch.cuda.is_available() else "cpu") | ||||
|  | ||||
|     if torch.cuda.is_available(): | ||||
|         if torch.version.cuda != gpu_arch_ver: | ||||
|             raise RuntimeError( | ||||
|                 f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}" | ||||
|             ) | ||||
|         print(f"torch cuda: {torch.version.cuda}") | ||||
|         # todo add cudnn version validation | ||||
|         print(f"torch cudnn: {torch.backends.cudnn.version()}") | ||||
|         print(f"cuDNN enabled? {torch.backends.cudnn.enabled}") | ||||
|  | ||||
|         torch.cuda.init() | ||||
|         print("CUDA initialized successfully") | ||||
|         print(f"Number of CUDA devices: {torch.cuda.device_count()}") | ||||
|         for i in range(torch.cuda.device_count()): | ||||
|             print(f"Device {i}: {torch.cuda.get_device_name(i)}") | ||||
|  | ||||
|         # nccl is availbale only on Linux | ||||
|         if sys.platform in ["linux", "linux2"]: | ||||
|             print(f"torch nccl version: {torch.cuda.nccl.version()}") | ||||
|  | ||||
|         if runtime_error_check == "enabled": | ||||
|             test_cuda_runtime_errors_captured() | ||||
|  | ||||
|  | ||||
| def smoke_test_conv2d() -> None: | ||||
|     import torch.nn as nn | ||||
|  | ||||
|     print("Testing smoke_test_conv2d") | ||||
|     # With square kernels and equal stride | ||||
|     m = nn.Conv2d(16, 33, 3, stride=2) | ||||
|     # non-square kernels and unequal stride and with padding | ||||
|     m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) | ||||
|     assert m is not None | ||||
|     # non-square kernels and unequal stride and with padding and dilation | ||||
|     basic_conv = nn.Conv2d( | ||||
|         16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1) | ||||
|     ) | ||||
|     input = torch.randn(20, 16, 50, 100) | ||||
|     output = basic_conv(input) | ||||
|  | ||||
|     if is_cuda_system: | ||||
|         print("Testing smoke_test_conv2d with cuda") | ||||
|         conv = nn.Conv2d(3, 3, 3).cuda() | ||||
|         x = torch.randn(1, 3, 24, 24, device="cuda") | ||||
|         with torch.cuda.amp.autocast(): | ||||
|             out = conv(x) | ||||
|         assert out is not None | ||||
|  | ||||
|         supported_dtypes = [torch.float16, torch.float32, torch.float64] | ||||
|         for dtype in supported_dtypes: | ||||
|             print(f"Testing smoke_test_conv2d with cuda for {dtype}") | ||||
|             conv = basic_conv.to(dtype).cuda() | ||||
|             input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype) | ||||
|             output = conv(input) | ||||
|             assert output is not None | ||||
|  | ||||
|  | ||||
| def test_linalg(device="cpu") -> None: | ||||
|     print(f"Testing smoke_test_linalg on {device}") | ||||
|     A = torch.randn(5, 3, device=device) | ||||
|     U, S, Vh = torch.linalg.svd(A, full_matrices=False) | ||||
|     assert ( | ||||
|         U.shape == A.shape | ||||
|         and S.shape == torch.Size([3]) | ||||
|         and Vh.shape == torch.Size([3, 3]) | ||||
|     ) | ||||
|     torch.dist(A, U @ torch.diag(S) @ Vh) | ||||
|  | ||||
|     U, S, Vh = torch.linalg.svd(A) | ||||
|     assert ( | ||||
|         U.shape == torch.Size([5, 5]) | ||||
|         and S.shape == torch.Size([3]) | ||||
|         and Vh.shape == torch.Size([3, 3]) | ||||
|     ) | ||||
|     torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh) | ||||
|  | ||||
|     A = torch.randn(7, 5, 3, device=device) | ||||
|     U, S, Vh = torch.linalg.svd(A, full_matrices=False) | ||||
|     torch.dist(A, U @ torch.diag_embed(S) @ Vh) | ||||
|  | ||||
|     if device == "cuda": | ||||
|         supported_dtypes = [torch.float32, torch.float64] | ||||
|         for dtype in supported_dtypes: | ||||
|             print(f"Testing smoke_test_linalg with cuda for {dtype}") | ||||
|             A = torch.randn(20, 16, 50, 100, device=device, dtype=dtype) | ||||
|             torch.linalg.svd(A) | ||||
|  | ||||
|  | ||||
| def smoke_test_compile(device: str = "cpu") -> None: | ||||
|     supported_dtypes = [torch.float16, torch.float32, torch.float64] | ||||
|  | ||||
|     def foo(x: torch.Tensor) -> torch.Tensor: | ||||
|         return torch.sin(x) + torch.cos(x) | ||||
|  | ||||
|     for dtype in supported_dtypes: | ||||
|         print(f"Testing smoke_test_compile for {device} and {dtype}") | ||||
|         x = torch.rand(3, 3, device=device).type(dtype) | ||||
|         x_eager = foo(x) | ||||
|         x_pt2 = torch.compile(foo)(x) | ||||
|         torch.testing.assert_close(x_eager, x_pt2) | ||||
|  | ||||
|     # Check that SIMD were detected for the architecture | ||||
|     if device == "cpu": | ||||
|         from torch._inductor.codecache import pick_vec_isa | ||||
|  | ||||
|         isa = pick_vec_isa() | ||||
|         if not isa: | ||||
|             raise RuntimeError("Can't detect vectorized ISA for CPU") | ||||
|         print(f"Picked CPU ISA {type(isa).__name__} bit width {isa.bit_width()}") | ||||
|  | ||||
|     # Reset torch dynamo since we are changing mode | ||||
|     torch._dynamo.reset() | ||||
|     dtype = torch.float32 | ||||
|     torch.set_float32_matmul_precision("high") | ||||
|     print(f"Testing smoke_test_compile with mode 'max-autotune' for {dtype}") | ||||
|     x = torch.rand(64, 1, 28, 28, device=device).type(torch.float32) | ||||
|     model = Net().to(device=device) | ||||
|     x_pt2 = torch.compile(model, mode="max-autotune")(x) | ||||
|  | ||||
|  | ||||
| def smoke_test_modules(): | ||||
|     cwd = os.getcwd() | ||||
|     for module in MODULES: | ||||
|         if module["repo"]: | ||||
|             if not os.path.exists(f"{cwd}/{module['repo_name']}"): | ||||
|                 print(f"Path does not exist: {cwd}/{module['repo_name']}") | ||||
|                 try: | ||||
|                     subprocess.check_output( | ||||
|                         f"git clone --depth 1 {module['repo']}", | ||||
|                         stderr=subprocess.STDOUT, | ||||
|                         shell=True, | ||||
|                     ) | ||||
|                 except subprocess.CalledProcessError as exc: | ||||
|                     raise RuntimeError( | ||||
|                         f"Cloning {module['repo']} FAIL: {exc.returncode} Output: {exc.output}" | ||||
|                     ) from exc | ||||
|             try: | ||||
|                 smoke_test_command = f"python3 {module['smoke_test']}" | ||||
|                 if target_os == "windows": | ||||
|                     smoke_test_command = f"python {module['smoke_test']}" | ||||
|                 output = subprocess.check_output( | ||||
|                     smoke_test_command, | ||||
|                     stderr=subprocess.STDOUT, | ||||
|                     shell=True, | ||||
|                     universal_newlines=True, | ||||
|                 ) | ||||
|             except subprocess.CalledProcessError as exc: | ||||
|                 raise RuntimeError( | ||||
|                     f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}" | ||||
|                 ) from exc | ||||
|             else: | ||||
|                 print(f"Output: \n{output}\n") | ||||
|  | ||||
|  | ||||
| def main() -> None: | ||||
|     parser = argparse.ArgumentParser() | ||||
|     parser.add_argument( | ||||
|         "--package", | ||||
|         help="Package to include in smoke testing", | ||||
|         type=str, | ||||
|         choices=["all", "torchonly"], | ||||
|         default="all", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--runtime-error-check", | ||||
|         help="No Runtime Error check", | ||||
|         type=str, | ||||
|         choices=["enabled", "disabled"], | ||||
|         default="enabled", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--torch-compile-check", | ||||
|         help="Check torch compile", | ||||
|         type=str, | ||||
|         choices=["enabled", "disabled"], | ||||
|         default="enabled", | ||||
|     ) | ||||
|     options = parser.parse_args() | ||||
|     print(f"torch: {torch.__version__}") | ||||
|     print(torch.__config__.parallel_info()) | ||||
|  | ||||
|     check_version(options.package) | ||||
|     smoke_test_conv2d() | ||||
|     test_linalg() | ||||
|     test_numpy() | ||||
|     if is_cuda_system: | ||||
|         test_linalg("cuda") | ||||
|  | ||||
|     if options.package == "all": | ||||
|         smoke_test_modules() | ||||
|  | ||||
|     smoke_test_cuda( | ||||
|         options.package, options.runtime_error_check, options.torch_compile_check | ||||
|     ) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
| @ -14,7 +14,7 @@ source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| # Do not change workspace permissions for ROCm CI jobs | ||||
| # as it can leave workspace with bad permissions for cancelled jobs | ||||
| if [[ "$BUILD_ENVIRONMENT" != *rocm* && -d /var/lib/jenkins/workspace ]]; then | ||||
| if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then | ||||
|   # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96) | ||||
|   WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace") | ||||
|   cleanup_workspace() { | ||||
| @ -48,17 +48,17 @@ NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}" | ||||
|  | ||||
| export VALGRIND=ON | ||||
| # export TORCH_INDUCTOR_INSTALL_GXX=ON | ||||
| if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|   # clang9 appears to miscompile code involving std::optional<c10::SymInt>, | ||||
| if [[ "$BUILD_ENVIRONMENT" == *clang9* ]]; then | ||||
|   # clang9 appears to miscompile code involving c10::optional<c10::SymInt>, | ||||
|   # such that valgrind complains along these lines: | ||||
|   # | ||||
|   # Conditional jump or move depends on uninitialised value(s) | ||||
|   #    at 0x40303A: ~optional_base (Optional.h:281) | ||||
|   #    by 0x40303A: call (Dispatcher.h:448) | ||||
|   #    by 0x40303A: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, std::optional<c10::SymInt>) (basic.cpp:10) | ||||
|   #    by 0x40303A: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:10) | ||||
|   #    by 0x403700: main (basic.cpp:16) | ||||
|   #  Uninitialised value was created by a stack allocation | ||||
|   #    at 0x402AAA: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, std::optional<c10::SymInt>) (basic.cpp:6) | ||||
|   #    at 0x402AAA: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:6) | ||||
|   # | ||||
|   # The problem does not appear with gcc or newer versions of clang (we tested | ||||
|   # clang14).  So we suppress valgrind testing for clang9 specifically. | ||||
| @ -72,7 +72,7 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|   # | ||||
|   # using namespace at; | ||||
|   # | ||||
|   # Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, std::optional<c10::SymInt> storage_offset) { | ||||
|   # Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional<c10::SymInt> storage_offset) { | ||||
|   #   auto op = c10::Dispatcher::singleton() | ||||
|   #       .findSchemaOrThrow(at::_ops::as_strided::name, at::_ops::as_strided::overload_name) | ||||
|   #       .typed<at::_ops::as_strided::schema>(); | ||||
| @ -81,7 +81,7 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|   # | ||||
|   # int main(int argv) { | ||||
|   #   Tensor b = empty({3, 4}); | ||||
|   #   auto z = call(b, b.sym_sizes(), b.sym_strides(), std::nullopt); | ||||
|   #   auto z = call(b, b.sym_sizes(), b.sym_strides(), c10::nullopt); | ||||
|   # } | ||||
|   export VALGRIND=OFF | ||||
| fi | ||||
| @ -169,13 +169,9 @@ fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|   # Source Intel oneAPI envrioment script to enable xpu runtime related libraries | ||||
|   # refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html | ||||
|   # refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html | ||||
|   # shellcheck disable=SC1091 | ||||
|   source /opt/intel/oneapi/compiler/latest/env/vars.sh | ||||
|   if [ -f /opt/intel/oneapi/umf/latest/env/vars.sh ]; then | ||||
|     # shellcheck disable=SC1091 | ||||
|     source /opt/intel/oneapi/umf/latest/env/vars.sh | ||||
|   fi | ||||
|   # Check XPU status before testing | ||||
|   xpu-smi discovery | ||||
| fi | ||||
| @ -200,9 +196,6 @@ install_tlparse | ||||
| # ASAN test is not working | ||||
| if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then | ||||
|     export ASAN_OPTIONS=detect_leaks=0:symbolize=1:detect_stack_use_after_return=true:strict_init_order=true:detect_odr_violation=1:detect_container_overflow=0:check_initialization_order=true:debug=true | ||||
|     if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then | ||||
|         export ASAN_OPTIONS="${ASAN_OPTIONS}:protect_shadow_gap=0" | ||||
|     fi | ||||
|     export UBSAN_OPTIONS=print_stacktrace=1:suppressions=$PWD/ubsan.supp | ||||
|     export PYTORCH_TEST_WITH_ASAN=1 | ||||
|     export PYTORCH_TEST_WITH_UBSAN=1 | ||||
| @ -240,8 +233,8 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then | ||||
|     # it depends on a ton of dynamic libraries that most programs aren't gonna | ||||
|     # have, and it applies to child processes. | ||||
|  | ||||
|     LD_PRELOAD=$(clang --print-file-name=libclang_rt.asan-x86_64.so) | ||||
|     export LD_PRELOAD | ||||
|     # TODO: get rid of the hardcoded path | ||||
|     export LD_PRELOAD=/usr/lib/llvm-15/lib/clang/15.0.7/lib/linux/libclang_rt.asan-x86_64.so | ||||
|     # Disable valgrind for asan | ||||
|     export VALGRIND=OFF | ||||
|  | ||||
| @ -288,7 +281,7 @@ test_python_shard() { | ||||
|  | ||||
|   # modify LD_LIBRARY_PATH to ensure it has the conda env. | ||||
|   # This set of tests has been shown to be buggy without it for the split-build | ||||
|   time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running | ||||
|   time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION | ||||
|  | ||||
|   assert_git_not_dirty | ||||
| } | ||||
| @ -300,7 +293,7 @@ test_python() { | ||||
| } | ||||
|  | ||||
|  | ||||
| test_dynamo_wrapped_shard() { | ||||
| test_dynamo_shard() { | ||||
|   if [[ -z "$NUM_TEST_SHARDS" ]]; then | ||||
|     echo "NUM_TEST_SHARDS must be defined to run a Python test shard" | ||||
|     exit 1 | ||||
| @ -314,8 +307,7 @@ test_dynamo_wrapped_shard() { | ||||
|     --exclude-distributed-tests \ | ||||
|     --exclude-torch-export-tests \ | ||||
|     --shard "$1" "$NUM_TEST_SHARDS" \ | ||||
|     --verbose \ | ||||
|     --upload-artifacts-while-running | ||||
|     --verbose | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| @ -328,7 +320,6 @@ test_inductor_distributed() { | ||||
|   python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose | ||||
|   python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose | ||||
|   python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose | ||||
|   python test/run_test.py -i distributed/_composable/test_replicate_with_compiler.py --verbose | ||||
|   python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose | ||||
|   python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose | ||||
|   python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose | ||||
| @ -340,12 +331,11 @@ test_inductor_distributed() { | ||||
|   python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose | ||||
|   python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose | ||||
|   python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py -k test_clip_grad_norm_2d --verbose | ||||
|   python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_compile.py --verbose | ||||
|   python test/run_test.py -i distributed/fsdp/test_fsdp_tp_integration.py -k test_fsdp_tp_integration --verbose | ||||
|  | ||||
|   # this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported | ||||
|   # with if required # gpus aren't available | ||||
|   python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives distributed/test_compute_comm_reordering --verbose | ||||
|   python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives --verbose | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| @ -379,39 +369,22 @@ test_inductor_aoti() { | ||||
|   CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference | ||||
| } | ||||
|  | ||||
| test_inductor_cpp_wrapper() { | ||||
|   export TORCHINDUCTOR_CPP_WRAPPER=1 | ||||
| test_inductor_cpp_wrapper_abi_compatible() { | ||||
|   export TORCHINDUCTOR_ABI_COMPATIBLE=1 | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|  | ||||
|   # Run certain inductor unit tests with cpp wrapper. In the end state, we should be able to run all the inductor | ||||
|   # unit tests with cpp wrapper. | ||||
|   python test/run_test.py --include inductor/test_torchinductor.py --verbose | ||||
|   echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1" | ||||
|   # cpu stack allocation causes segfault and needs more investigation | ||||
|   PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper | ||||
|   python test/run_test.py --include inductor/test_cuda_cpp_wrapper | ||||
|  | ||||
|  | ||||
|   # Run inductor benchmark tests with cpp wrapper. | ||||
|   # Skip benchmark tests if it's in rerun-disabled-mode. | ||||
|   if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]]; then | ||||
|     echo "skip dynamo benchmark tests for rerun-disabled-test" | ||||
|   else | ||||
|     echo "run dynamo benchmark tests with cpp wrapper" | ||||
|     python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \ | ||||
|   TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \ | ||||
|     --training --inductor --disable-cudagraphs --only vit_base_patch16_224 \ | ||||
|     --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" | ||||
|     python benchmarks/dynamo/check_accuracy.py \ | ||||
|       --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \ | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv" | ||||
|  | ||||
|     python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ | ||||
|       --bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" | ||||
|     python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ | ||||
|       --bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" | ||||
|     python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ | ||||
|       --bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" | ||||
|     python benchmarks/dynamo/check_accuracy.py \ | ||||
|       --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \ | ||||
|       --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv" | ||||
|   fi | ||||
|   python benchmarks/dynamo/check_accuracy.py \ | ||||
|     --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \ | ||||
|     --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv" | ||||
| } | ||||
|  | ||||
| # "Global" flags for inductor benchmarking controlled by TEST_CONFIG | ||||
| @ -428,10 +401,10 @@ pr_time_benchmarks() { | ||||
|  | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|   PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "benchmarks/dynamo/pr_time_benchmarks/benchmarks" | ||||
|   PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt" "benchmarks/dynamo/pr_time_benchmarks/benchmarks" | ||||
|   echo "benchmark results on current PR: " | ||||
|   cat  "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" | ||||
|   PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "$TEST_REPORTS_DIR/new_expected_results.csv" | ||||
|   cat  "$TEST_REPORTS_DIR/pr_time_benchmarks_after.txt" | ||||
|  | ||||
| } | ||||
|  | ||||
| if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then | ||||
| @ -539,7 +512,7 @@ test_perf_for_dashboard() { | ||||
|               "${target_flag[@]}" --"$mode" --"$dtype" --export --disable-cudagraphs "$@" \ | ||||
|               --output "$TEST_REPORTS_DIR/${backend}_export_${suite}_${dtype}_${mode}_${device}_${target}.csv" | ||||
|         fi | ||||
|         $TASKSET python "benchmarks/dynamo/$suite.py" \ | ||||
|         TORCHINDUCTOR_ABI_COMPATIBLE=1 $TASKSET python "benchmarks/dynamo/$suite.py" \ | ||||
|             "${target_flag[@]}" --"$mode" --"$dtype" --export-aot-inductor --disable-cudagraphs "$@" \ | ||||
|             --output "$TEST_REPORTS_DIR/${backend}_aot_inductor_${suite}_${dtype}_${mode}_${device}_${target}.csv" | ||||
|       fi | ||||
| @ -594,6 +567,13 @@ test_single_dynamo_benchmark() { | ||||
|     test_perf_for_dashboard "$suite" \ | ||||
|       "${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}" | ||||
|   else | ||||
|     if [[ "${TEST_CONFIG}" == *aot_inductor* && "${TEST_CONFIG}" != *cpu_aot_inductor* ]]; then | ||||
|       # Test AOTInductor with the ABI-compatible mode on CI | ||||
|       # This can be removed once the ABI-compatible mode becomes default. | ||||
|       # For CPU device, we perfer non ABI-compatible mode on CI when testing AOTInductor. | ||||
|       export TORCHINDUCTOR_ABI_COMPATIBLE=1 | ||||
|     fi | ||||
|  | ||||
|     if [[ "${TEST_CONFIG}" == *_avx2* ]]; then | ||||
|       TEST_CONFIG=${TEST_CONFIG//_avx2/} | ||||
|     fi | ||||
| @ -615,15 +595,7 @@ test_single_dynamo_benchmark() { | ||||
| } | ||||
|  | ||||
| test_inductor_micro_benchmark() { | ||||
|   # torchao requires cuda 8.0 or above for bfloat16 support | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then | ||||
|     export TORCH_CUDA_ARCH_LIST="8.0;8.6" | ||||
|   fi | ||||
|   install_torchao | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   if [[ "${TEST_CONFIG}" == *cpu* ]]; then | ||||
|     test_inductor_set_cpu_affinity | ||||
|   fi | ||||
|   python benchmarks/gpt_fast/benchmark.py --output "${TEST_REPORTS_DIR}/gpt_fast_benchmark.csv" | ||||
| } | ||||
|  | ||||
| @ -632,11 +604,6 @@ test_inductor_halide() { | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_inductor_triton_cpu() { | ||||
|   python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_dynamo_benchmark() { | ||||
|   # Usage: test_dynamo_benchmark huggingface 0 | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
| @ -674,12 +641,32 @@ test_inductor_torchbench_smoketest_perf() { | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|  | ||||
|   # Test some models in the cpp wrapper mode | ||||
|   TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ | ||||
|     --bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" | ||||
|   TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ | ||||
|     --bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" | ||||
|   TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \ | ||||
|     --bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" | ||||
|   python benchmarks/dynamo/check_accuracy.py \ | ||||
|     --actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \ | ||||
|     --expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv" | ||||
|  | ||||
|   python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \ | ||||
|     --batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \ | ||||
|     --output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" | ||||
|   # The threshold value needs to be actively maintained to make this check useful | ||||
|   python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_training_smoketest.csv" -t 1.4 | ||||
|  | ||||
|   TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/torchbench.py --device cuda --performance --bfloat16 --inference \ | ||||
|     --export-aot-inductor --only nanogpt --output "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" | ||||
|   # The threshold value needs to be actively maintained to make this check useful | ||||
|   # The perf number of nanogpt seems not very stable, e.g. | ||||
|   # https://github.com/pytorch/pytorch/actions/runs/7158691360/job/19491437314, | ||||
|   # and thus we lower its threshold to reduce flakiness. If this continues to be a problem, | ||||
|   # we switch to use some other model. | ||||
|   python benchmarks/dynamo/check_perf_csv.py -f "$TEST_REPORTS_DIR/inductor_inference_smoketest.csv" -t 4.9 | ||||
|  | ||||
|   # Check memory compression ratio for a few models | ||||
|   for test in hf_Albert timm_vision_transformer; do | ||||
|     python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --amp --training \ | ||||
| @ -723,10 +710,6 @@ test_inductor_set_cpu_affinity(){ | ||||
|     export KMP_BLOCKTIME=1 | ||||
|   fi | ||||
|   cores=$(test_inductor_get_core_number) | ||||
|   # Set number of cores to 16 on Aarch64 for performance runs. | ||||
|   if [[ "${TEST_CONFIG}" == *aarch64* && $cores -gt 16 ]]; then | ||||
|     cores=16 | ||||
|   fi | ||||
|   export OMP_NUM_THREADS=$cores | ||||
|   end_core=$((cores-1)) | ||||
|   export TASKSET="taskset -c 0-$end_core" | ||||
| @ -763,9 +746,19 @@ test_inductor_torchbench_cpu_smoketest_perf(){ | ||||
|     fi | ||||
|     cat "$output_name" | ||||
|     # The threshold value needs to be actively maintained to make this check useful. | ||||
|     # Allow 1% variance for CPU perf to accommodate perf fluctuation | ||||
|     python benchmarks/dynamo/check_perf_csv.py -f "$output_name" -t "$speedup_target" -s 0.99 | ||||
|     python benchmarks/dynamo/check_perf_csv.py -f "$output_name" -t "$speedup_target" | ||||
|   done | ||||
|  | ||||
|   # Add a few ABI-compatible accuracy tests for CPU. These can be removed once we turn on ABI-compatible as default. | ||||
|   TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \ | ||||
|     --bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only adv_inception_v3 \ | ||||
|     --output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv" | ||||
|   TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \ | ||||
|     --bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only beit_base_patch16_224 \ | ||||
|     --output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv" | ||||
|   python benchmarks/dynamo/check_accuracy.py \ | ||||
|     --actual "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv" \ | ||||
|     --expected "benchmarks/dynamo/ci_expected_accuracy/aot_inductor_timm_inference.csv" | ||||
| } | ||||
|  | ||||
| test_torchbench_gcp_smoketest(){ | ||||
| @ -823,7 +816,7 @@ test_without_numpy() { | ||||
|   # Regression test for https://github.com/pytorch/pytorch/issues/66353 | ||||
|   python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;print(torch.tensor([torch.tensor(0.), torch.tensor(1.)]))" | ||||
|   # Regression test for https://github.com/pytorch/pytorch/issues/109387 | ||||
|   if [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then | ||||
|   if [[ "${TEST_CONFIG}" == *dynamo* ]]; then | ||||
|     python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;torch.compile(lambda x:print(x))('Hello World')" | ||||
|   fi | ||||
|   popd | ||||
| @ -957,9 +950,6 @@ test_distributed() { | ||||
|     python test/run_test.py --cpp --verbose -i cpp/HashStoreTest | ||||
|     python test/run_test.py --cpp --verbose -i cpp/TCPStoreTest | ||||
|  | ||||
|     echo "Testing multi-GPU linalg tests" | ||||
|     python test/run_test.py -i test_linalg.py -k test_matmul_offline_mgpu_tunable --verbose | ||||
|  | ||||
|     if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then | ||||
|       MPIEXEC=$(command -v mpiexec) | ||||
|       if [[ -n "$MPIEXEC" ]]; then | ||||
| @ -1209,7 +1199,7 @@ EOF | ||||
|   git reset --hard "${SHA_TO_COMPARE}" | ||||
|   git submodule sync && git submodule update --init --recursive | ||||
|   echo "::group::Installing Torch From Base Commit" | ||||
|   pip3 install -r requirements.txt | ||||
|   pip install -r requirements.txt | ||||
|   # shellcheck source=./common-build.sh | ||||
|   source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh" | ||||
|   python setup.py bdist_wheel --bdist-dir="base_bdist_tmp" --dist-dir="base_dist" | ||||
| @ -1366,11 +1356,10 @@ test_executorch() { | ||||
|   export EXECUTORCH_BUILD_PYBIND=ON | ||||
|   export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" | ||||
|  | ||||
|   # For llama3 | ||||
|   bash examples/models/llama3_2_vision/install_requirements.sh | ||||
|   # NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch | ||||
|   # from the PR | ||||
|   bash .ci/scripts/setup-linux.sh cmake | ||||
|   # shellcheck disable=SC1091 | ||||
|   source .ci/scripts/setup-linux.sh cmake | ||||
|  | ||||
|   echo "Run ExecuTorch unit tests" | ||||
|   pytest -v -n auto | ||||
| @ -1380,7 +1369,7 @@ test_executorch() { | ||||
|   echo "Run ExecuTorch regression tests for some models" | ||||
|   # TODO(huydhn): Add more coverage here using ExecuTorch's gather models script | ||||
|   # shellcheck disable=SC1091 | ||||
|   source .ci/scripts/test_model.sh mv3 cmake xnnpack-quantization-delegation '' | ||||
|   source .ci/scripts/test.sh mv3 cmake xnnpack-quantization-delegation '' | ||||
|  | ||||
|   popd | ||||
|  | ||||
| @ -1391,17 +1380,14 @@ test_executorch() { | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_linux_aarch64() { | ||||
| test_linux_aarch64(){ | ||||
|   python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \ | ||||
|         test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \ | ||||
|         test_foreach test_reductions test_unary_ufuncs \ | ||||
|         --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose | ||||
|        test_transformers test_multiprocessing test_numpy_interop --verbose | ||||
|  | ||||
|   # Dynamo tests | ||||
|   python test/run_test.py --include dynamo/test_compile dynamo/test_backends dynamo/test_comptime dynamo/test_config \ | ||||
|        dynamo/test_functions dynamo/test_fx_passes_pre_grad dynamo/test_interop dynamo/test_model_output dynamo/test_modules \ | ||||
|        dynamo/test_optimizers dynamo/test_recompile_ux dynamo/test_recompiles \ | ||||
|        --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose | ||||
|        dynamo/test_optimizers dynamo/test_recompile_ux dynamo/test_recompiles --verbose | ||||
|  | ||||
|   # Inductor tests | ||||
|   python test/run_test.py --include inductor/test_torchinductor inductor/test_benchmark_fusion inductor/test_codecache \ | ||||
| @ -1411,20 +1397,14 @@ test_linux_aarch64() { | ||||
|        inductor/test_max_autotune inductor/test_memory_planning inductor/test_metrics inductor/test_multi_kernel inductor/test_pad_mm \ | ||||
|        inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \ | ||||
|        inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \ | ||||
|        inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes inductor/test_memory \ | ||||
|        inductor/test_triton_cpu_backend inductor/test_triton_extension_backend inductor/test_mkldnn_pattern_matcher inductor/test_cpu_cpp_wrapper \ | ||||
|        --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose | ||||
|        inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes --verbose | ||||
| } | ||||
|  | ||||
| if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then | ||||
|   (cd test && python -c "import torch; print(torch.__config__.show())") | ||||
|   (cd test && python -c "import torch; print(torch.__config__.parallel_info())") | ||||
| fi | ||||
| if [[ "${TEST_CONFIG}" == *numpy_2* ]]; then | ||||
|   # Install numpy-2.0.2 and compatible scipy & numba versions | ||||
|   python -mpip install --pre numpy==2.0.2 scipy==1.13.1 numba==0.60.0 | ||||
|   python test/run_test.py --include dynamo/test_functions.py dynamo/test_unspec.py test_binary_ufuncs.py test_fake_tensor.py test_linalg.py test_numpy_interop.py test_tensor_creation_ops.py test_torch.py torch_np/test_basic.py | ||||
| elif [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then | ||||
| if [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then | ||||
|   test_linux_aarch64 | ||||
| elif [[ "${TEST_CONFIG}" == *backward* ]]; then | ||||
|   test_forward_backward_compatibility | ||||
| @ -1450,8 +1430,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then | ||||
|   test_inductor_distributed | ||||
| elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then | ||||
|   test_inductor_halide | ||||
| elif [[ "${TEST_CONFIG}" == *inductor-triton-cpu* ]]; then | ||||
|   test_inductor_triton_cpu | ||||
| elif [[ "${TEST_CONFIG}" == *inductor-micro-benchmark* ]]; then | ||||
|   test_inductor_micro_benchmark | ||||
| elif [[ "${TEST_CONFIG}" == *huggingface* ]]; then | ||||
| @ -1468,13 +1446,14 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then | ||||
|   else | ||||
|     install_torchaudio cuda | ||||
|   fi | ||||
|   install_torchtext | ||||
|   install_torchvision | ||||
|   TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install git+https://github.com/pytorch/ao.git | ||||
|   id=$((SHARD_NUMBER-1)) | ||||
|   # https://github.com/opencv/opencv-python/issues/885 | ||||
|   pip_install opencv-python==4.8.0.74 | ||||
|   if [[ "${TEST_CONFIG}" == *inductor_torchbench_smoketest_perf* ]]; then | ||||
|     checkout_install_torchbench hf_Bert hf_Albert timm_vision_transformer | ||||
|     checkout_install_torchbench hf_Bert hf_Albert nanogpt timm_vision_transformer | ||||
|     PYTHONPATH=$(pwd)/torchbench test_inductor_torchbench_smoketest_perf | ||||
|   elif [[ "${TEST_CONFIG}" == *inductor_torchbench_cpu_smoketest_perf* ]]; then | ||||
|     checkout_install_torchbench timm_vision_transformer phlippe_densenet basic_gnn_edgecnn \ | ||||
| @ -1493,11 +1472,9 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then | ||||
|     fi | ||||
|     PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id" | ||||
|   fi | ||||
| elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then | ||||
|   install_torchaudio cuda | ||||
| elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper_abi_compatible* ]]; then | ||||
|   install_torchvision | ||||
|   checkout_install_torchbench hf_T5 llama moco | ||||
|   PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper | ||||
|   test_inductor_cpp_wrapper_abi_compatible | ||||
| elif [[ "${TEST_CONFIG}" == *inductor* ]]; then | ||||
|   install_torchvision | ||||
|   test_inductor_shard "${SHARD_NUMBER}" | ||||
| @ -1506,9 +1483,9 @@ elif [[ "${TEST_CONFIG}" == *inductor* ]]; then | ||||
|       test_inductor_distributed | ||||
|     fi | ||||
|   fi | ||||
| elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then | ||||
| elif [[ "${TEST_CONFIG}" == *dynamo* ]]; then | ||||
|   install_torchvision | ||||
|   test_dynamo_wrapped_shard "${SHARD_NUMBER}" | ||||
|   test_dynamo_shard "${SHARD_NUMBER}" | ||||
|   if [[ "${SHARD_NUMBER}" == 1 ]]; then | ||||
|     test_aten | ||||
|   fi | ||||
|  | ||||
| @ -1,26 +0,0 @@ | ||||
| cmake_minimum_required(VERSION 3.0 FATAL_ERROR) | ||||
| project(simple-torch-test) | ||||
|  | ||||
| find_package(Torch REQUIRED) | ||||
| set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}") | ||||
|  | ||||
|  | ||||
| add_executable(simple-torch-test simple-torch-test.cpp) | ||||
| target_include_directories(simple-torch-test PRIVATE  ${TORCH_INCLUDE_DIRS}) | ||||
| target_link_libraries(simple-torch-test "${TORCH_LIBRARIES}") | ||||
| set_property(TARGET simple-torch-test PROPERTY CXX_STANDARD 17) | ||||
|  | ||||
| find_package(CUDAToolkit 11.8) | ||||
|  | ||||
| target_link_libraries(simple-torch-test CUDA::cudart CUDA::cufft CUDA::cusparse CUDA::cublas CUDA::cusolver) | ||||
| find_library(CUDNN_LIBRARY NAMES cudnn) | ||||
| target_link_libraries(simple-torch-test  ${CUDNN_LIBRARY} ) | ||||
| if(MSVC) | ||||
|   file(GLOB TORCH_DLLS  "$ENV{CUDA_PATH}/bin/cudnn64_8.dll" "$ENV{NVTOOLSEXT_PATH}/bin/x64/*.dll") | ||||
|   message("dlls to copy "  ${TORCH_DLLS}) | ||||
|   add_custom_command(TARGET simple-torch-test | ||||
|                      POST_BUILD | ||||
|                      COMMAND ${CMAKE_COMMAND} -E copy_if_different | ||||
|                      ${TORCH_DLLS} | ||||
|                      $<TARGET_FILE_DIR:simple-torch-test>) | ||||
| endif(MSVC) | ||||
| @ -1,15 +0,0 @@ | ||||
| #include <torch/torch.h> | ||||
|  | ||||
| int main(int argc, const char* argv[]) { | ||||
|     std::cout << "Checking that CUDA archs are setup correctly" << std::endl; | ||||
|     TORCH_CHECK(torch::rand({ 3, 5 }, torch::Device(torch::kCUDA)).defined(), "CUDA archs are not setup correctly"); | ||||
|  | ||||
|     // These have to run after CUDA is initialized | ||||
|  | ||||
|     std::cout << "Checking that magma is available" << std::endl; | ||||
|     TORCH_CHECK(torch::hasMAGMA(), "MAGMA is not available"); | ||||
|  | ||||
|     std::cout << "Checking that CuDNN is available" << std::endl; | ||||
|     TORCH_CHECK(torch::cuda::cudnn_is_available(), "CuDNN is not available"); | ||||
|     return 0; | ||||
| } | ||||
| @ -1,6 +0,0 @@ | ||||
| #include <torch/torch.h> | ||||
|  | ||||
| int main(int argc, const char* argv[]) { | ||||
|     TORCH_CHECK(torch::hasMKL(), "MKL is not available"); | ||||
|     return 0; | ||||
| } | ||||
| @ -1,7 +0,0 @@ | ||||
| #include <ATen/ATen.h> | ||||
| #include <torch/torch.h> | ||||
|  | ||||
| int main(int argc, const char* argv[]) { | ||||
|     TORCH_CHECK(at::globalContext().isXNNPACKAvailable(), "XNNPACK is not available"); | ||||
|     return 0; | ||||
| } | ||||
| @ -1,38 +0,0 @@ | ||||
| r""" | ||||
| It's used to check basic rnn features with cuda. | ||||
| For example, it would throw exception if some components are missing | ||||
| """ | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| import torch.optim as optim | ||||
|  | ||||
|  | ||||
| class SimpleCNN(nn.Module): | ||||
|     def __init__(self): | ||||
|         super().__init__() | ||||
|         self.conv = nn.Conv2d(1, 1, 3) | ||||
|         self.pool = nn.MaxPool2d(2, 2) | ||||
|  | ||||
|     def forward(self, inputs): | ||||
|         output = self.pool(F.relu(self.conv(inputs))) | ||||
|         output = output.view(1) | ||||
|         return output | ||||
|  | ||||
|  | ||||
| # Mock one infer | ||||
| device = torch.device("cuda:0") | ||||
| net = SimpleCNN().to(device) | ||||
| net_inputs = torch.rand((1, 1, 5, 5), device=device) | ||||
| outputs = net(net_inputs) | ||||
| print(outputs) | ||||
|  | ||||
| criterion = nn.MSELoss() | ||||
| optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.1) | ||||
|  | ||||
| # Mock one step training | ||||
| label = torch.full((1,), 1.0, dtype=torch.float, device=device) | ||||
| loss = criterion(outputs, label) | ||||
| loss.backward() | ||||
| optimizer.step() | ||||
| @ -1,14 +0,0 @@ | ||||
| r""" | ||||
| It's used to check basic rnn features with cuda. | ||||
| For example, it would throw exception if missing some components are missing | ||||
| """ | ||||
|  | ||||
| import torch | ||||
| import torch.nn as nn | ||||
|  | ||||
|  | ||||
| device = torch.device("cuda:0") | ||||
| rnn = nn.RNN(10, 20, 2).to(device) | ||||
| inputs = torch.randn(5, 3, 10).to(device) | ||||
| h0 = torch.randn(2, 3, 20).to(device) | ||||
| output, hn = rnn(inputs, h0) | ||||
| @ -1,6 +0,0 @@ | ||||
| #include <torch/torch.h> | ||||
|  | ||||
| int main(int argc, const char* argv[]) { | ||||
|     TORCH_WARN("Simple test passed!"); | ||||
|     return 0; | ||||
| } | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	