mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-24 23:54:56 +08:00 
			
		
		
		
	Compare commits
	
		
			2 Commits
		
	
	
		
			benchmarki
			...
			v2.7.0-rc1
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| cdd7a2c72b | |||
| d94ea2647c | 
| @ -20,7 +20,7 @@ cd / | ||||
| # on the mounted pytorch repo | ||||
| git config --global --add safe.directory /pytorch | ||||
| pip install -r /pytorch/requirements.txt | ||||
| pip install auditwheel==6.2.0 | ||||
| pip install auditwheel | ||||
| if [ "$DESIRED_CUDA" = "cpu" ]; then | ||||
|     echo "BASE_CUDA_VERSION is not set. Building cpu wheel." | ||||
|     #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files | ||||
|  | ||||
| @ -31,47 +31,33 @@ def build_ArmComputeLibrary() -> None: | ||||
|         "build=native", | ||||
|     ] | ||||
|     acl_install_dir = "/acl" | ||||
|     acl_checkout_dir = os.getenv("ACL_SOURCE_DIR", "ComputeLibrary") | ||||
|     if os.path.isdir(acl_install_dir): | ||||
|         shutil.rmtree(acl_install_dir) | ||||
|     if not os.path.isdir(acl_checkout_dir) or not len(os.listdir(acl_checkout_dir)): | ||||
|         check_call( | ||||
|             [ | ||||
|                 "git", | ||||
|                 "clone", | ||||
|                 "https://github.com/ARM-software/ComputeLibrary.git", | ||||
|                 "-b", | ||||
|                 "v25.02", | ||||
|                 "--depth", | ||||
|                 "1", | ||||
|                 "--shallow-submodules", | ||||
|             ] | ||||
|         ) | ||||
|     acl_checkout_dir = "ComputeLibrary" | ||||
|     os.makedirs(acl_install_dir) | ||||
|     check_call( | ||||
|         [ | ||||
|             "git", | ||||
|             "clone", | ||||
|             "https://github.com/ARM-software/ComputeLibrary.git", | ||||
|             "-b", | ||||
|             "v25.02", | ||||
|             "--depth", | ||||
|             "1", | ||||
|             "--shallow-submodules", | ||||
|         ] | ||||
|     ) | ||||
|  | ||||
|     check_call( | ||||
|         ["scons", "Werror=1", f"-j{os.cpu_count()}"] + acl_build_flags, | ||||
|         ["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"] | ||||
|         + acl_build_flags, | ||||
|         cwd=acl_checkout_dir, | ||||
|     ) | ||||
|     for d in ["arm_compute", "include", "utils", "support", "src", "build"]: | ||||
|     for d in ["arm_compute", "include", "utils", "support", "src"]: | ||||
|         shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") | ||||
|  | ||||
|  | ||||
| def replace_tag(filename) -> None: | ||||
|     with open(filename) as f: | ||||
|         lines = f.readlines() | ||||
|     for i, line in enumerate(lines): | ||||
|         if line.startswith("Tag:"): | ||||
|             lines[i] = line.replace("-linux_", "-manylinux_2_28_") | ||||
|             print(f"Updated tag from {line} to {lines[i]}") | ||||
|             break | ||||
|  | ||||
|     with open(filename, "w") as f: | ||||
|         f.writelines(lines) | ||||
|  | ||||
|  | ||||
| def package_cuda_wheel(wheel_path, desired_cuda) -> None: | ||||
| def update_wheel(wheel_path, desired_cuda) -> None: | ||||
|     """ | ||||
|     Package the cuda wheel libraries | ||||
|     Update the cuda wheel libraries | ||||
|     """ | ||||
|     folder = os.path.dirname(wheel_path) | ||||
|     wheelname = os.path.basename(wheel_path) | ||||
| @ -102,19 +88,30 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None: | ||||
|         "/usr/lib64/libgfortran.so.5", | ||||
|         "/acl/build/libarm_compute.so", | ||||
|         "/acl/build/libarm_compute_graph.so", | ||||
|         "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", | ||||
|         "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", | ||||
|         "/usr/local/lib/libnvpl_lapack_core.so.0", | ||||
|         "/usr/local/lib/libnvpl_blas_core.so.0", | ||||
|     ] | ||||
|  | ||||
|     if "128" in desired_cuda: | ||||
|     if enable_cuda: | ||||
|         libs_to_copy += [ | ||||
|             "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8", | ||||
|             "/usr/local/cuda/lib64/libcufile.so.0", | ||||
|             "/usr/local/cuda/lib64/libcufile_rdma.so.1", | ||||
|             "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", | ||||
|             "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", | ||||
|             "/usr/local/lib/libnvpl_lapack_core.so.0", | ||||
|             "/usr/local/lib/libnvpl_blas_core.so.0", | ||||
|         ] | ||||
|         if "126" in desired_cuda: | ||||
|             libs_to_copy += [ | ||||
|                 "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6", | ||||
|                 "/usr/local/cuda/lib64/libcufile.so.0", | ||||
|                 "/usr/local/cuda/lib64/libcufile_rdma.so.1", | ||||
|             ] | ||||
|         elif "128" in desired_cuda: | ||||
|             libs_to_copy += [ | ||||
|                 "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8", | ||||
|                 "/usr/local/cuda/lib64/libcufile.so.0", | ||||
|                 "/usr/local/cuda/lib64/libcufile_rdma.so.1", | ||||
|             ] | ||||
|     else: | ||||
|         libs_to_copy += [ | ||||
|             "/opt/OpenBLAS/lib/libopenblas.so.0", | ||||
|         ] | ||||
|  | ||||
|     # Copy libraries to unzipped_folder/a/lib | ||||
|     for lib_path in libs_to_copy: | ||||
|         lib_name = os.path.basename(lib_path) | ||||
| @ -123,13 +120,6 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None: | ||||
|             f"cd {folder}/tmp/torch/lib/; " | ||||
|             f"patchelf --set-rpath '$ORIGIN' --force-rpath {folder}/tmp/torch/lib/{lib_name}" | ||||
|         ) | ||||
|  | ||||
|     # Make sure the wheel is tagged with manylinux_2_28 | ||||
|     for f in os.scandir(f"{folder}/tmp/"): | ||||
|         if f.is_dir() and f.name.endswith(".dist-info"): | ||||
|             replace_tag(f"{f.path}/WHEEL") | ||||
|             break | ||||
|  | ||||
|     os.mkdir(f"{folder}/cuda_wheel") | ||||
|     os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *") | ||||
|     shutil.move( | ||||
| @ -146,9 +136,6 @@ def complete_wheel(folder: str) -> str: | ||||
|     """ | ||||
|     wheel_name = list_dir(f"/{folder}/dist")[0] | ||||
|  | ||||
|     # Please note for cuda we don't run auditwheel since we use custom script to package | ||||
|     # the cuda dependencies to the wheel file using update_wheel() method. | ||||
|     # However we need to make sure filename reflects the correct Manylinux platform. | ||||
|     if "pytorch" in folder and not enable_cuda: | ||||
|         print("Repairing Wheel with AuditWheel") | ||||
|         check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder) | ||||
| @ -160,14 +147,7 @@ def complete_wheel(folder: str) -> str: | ||||
|             f"/{folder}/dist/{repaired_wheel_name}", | ||||
|         ) | ||||
|     else: | ||||
|         repaired_wheel_name = wheel_name.replace( | ||||
|             "linux_aarch64", "manylinux_2_28_aarch64" | ||||
|         ) | ||||
|         print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}") | ||||
|         os.rename( | ||||
|             f"/{folder}/dist/{wheel_name}", | ||||
|             f"/{folder}/dist/{repaired_wheel_name}", | ||||
|         ) | ||||
|         repaired_wheel_name = wheel_name | ||||
|  | ||||
|     print(f"Copying {repaired_wheel_name} to artifacts") | ||||
|     shutil.copy2( | ||||
| @ -204,10 +184,8 @@ if __name__ == "__main__": | ||||
|     ).decode() | ||||
|  | ||||
|     print("Building PyTorch wheel") | ||||
|     build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " | ||||
|     # MAX_JOB=5 is not required for CPU backend (see commit 465d98b) | ||||
|     if enable_cuda: | ||||
|         build_vars = "MAX_JOBS=5 " + build_vars | ||||
|     build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 " | ||||
|     os.system("cd /pytorch; python setup.py clean") | ||||
|  | ||||
|     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") | ||||
|     desired_cuda = os.getenv("DESIRED_CUDA") | ||||
| @ -254,6 +232,6 @@ if __name__ == "__main__": | ||||
|         print("Updating Cuda Dependency") | ||||
|         filename = os.listdir("/pytorch/dist/") | ||||
|         wheel_path = f"/pytorch/dist/{filename[0]}" | ||||
|         package_cuda_wheel(wheel_path, desired_cuda) | ||||
|         update_wheel(wheel_path, desired_cuda) | ||||
|     pytorch_wheel_name = complete_wheel("/pytorch/") | ||||
|     print(f"Build Complete. Created {pytorch_wheel_name}..") | ||||
|  | ||||
| @ -19,11 +19,13 @@ import boto3 | ||||
|  | ||||
| # AMI images for us-east-1, change the following based on your ~/.aws/config | ||||
| os_amis = { | ||||
|     "ubuntu18_04": "ami-078eece1d8119409f",  # login_name: ubuntu | ||||
|     "ubuntu20_04": "ami-052eac90edaa9d08f",  # login_name: ubuntu | ||||
|     "ubuntu22_04": "ami-0c6c29c5125214c77",  # login_name: ubuntu | ||||
|     "redhat8": "ami-0698b90665a2ddcf1",  # login_name: ec2-user | ||||
| } | ||||
|  | ||||
| ubuntu18_04_ami = os_amis["ubuntu18_04"] | ||||
| ubuntu20_04_ami = os_amis["ubuntu20_04"] | ||||
|  | ||||
|  | ||||
| @ -657,6 +659,18 @@ def configure_system( | ||||
|             "sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip" | ||||
|         ) | ||||
|     host.run_cmd("pip3 install dataclasses typing-extensions") | ||||
|     # Install and switch to gcc-8 on Ubuntu-18.04 | ||||
|     if not host.using_docker() and host.ami == ubuntu18_04_ami and compiler == "gcc-8": | ||||
|         host.run_cmd("sudo apt-get install -y g++-8 gfortran-8") | ||||
|         host.run_cmd( | ||||
|             "sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 100" | ||||
|         ) | ||||
|         host.run_cmd( | ||||
|             "sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 100" | ||||
|         ) | ||||
|         host.run_cmd( | ||||
|             "sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-8 100" | ||||
|         ) | ||||
|     if not use_conda: | ||||
|         print("Installing Cython + numpy from PyPy") | ||||
|         host.run_cmd("sudo pip3 install Cython") | ||||
| @ -1012,7 +1026,7 @@ if __name__ == "__main__": | ||||
|         install_condaforge_python(host, args.python_version) | ||||
|         sys.exit(0) | ||||
|  | ||||
|     python_version = args.python_version if args.python_version is not None else "3.9" | ||||
|     python_version = args.python_version if args.python_version is not None else "3.8" | ||||
|  | ||||
|     if args.use_torch_from_pypi: | ||||
|         configure_system(host, compiler=args.compiler, python_version=python_version) | ||||
|  | ||||
| @ -10,3 +10,5 @@ example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are | ||||
| built on Jenkins and are used in triggered builds already have this | ||||
| environment variable set in their manifest. Also see | ||||
| `./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`. | ||||
|  | ||||
| Our Jenkins installation is located at https://ci.pytorch.org/jenkins/. | ||||
|  | ||||
| @ -13,6 +13,10 @@ if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then | ||||
|   echo 'Skipping tests' | ||||
|   exit 0 | ||||
| fi | ||||
| if [[ "${BUILD_ENVIRONMENT}" == *-rocm* ]]; then | ||||
|   # temporary to locate some kernel issues on the CI nodes | ||||
|   export HSAKMT_DEBUG_LEVEL=4 | ||||
| fi | ||||
| # These additional packages are needed for circleci ROCm builds. | ||||
| if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then | ||||
|     # Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by | ||||
|  | ||||
| @ -34,5 +34,5 @@ See `build.sh` for valid build environments (it's the giant switch). | ||||
| ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest | ||||
|  | ||||
| # Set flags (see build.sh) and build image | ||||
| sudo bash -c 'TRITON=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest | ||||
| sudo bash -c 'PROTOBUF=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest | ||||
| ``` | ||||
|  | ||||
| @ -1,7 +1,6 @@ | ||||
| ARG CUDA_VERSION=12.4 | ||||
| ARG BASE_TARGET=cuda${CUDA_VERSION} | ||||
| ARG ROCM_IMAGE=rocm/dev-almalinux-8:6.3-complete | ||||
| FROM amd64/almalinux:8.10-20250519 as base | ||||
| FROM amd64/almalinux:8 as base | ||||
|  | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| @ -9,10 +8,12 @@ ENV LANGUAGE en_US.UTF-8 | ||||
|  | ||||
| ARG DEVTOOLSET_VERSION=11 | ||||
|  | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
|  | ||||
| RUN yum -y update | ||||
| RUN yum -y install epel-release | ||||
| # install glibc-langpack-en make sure en_US.UTF-8 locale is available | ||||
| RUN yum -y install glibc-langpack-en | ||||
| RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel openssl-devel yum-utils autoconf automake make gcc-toolset-${DEVTOOLSET_VERSION}-toolchain | ||||
| # Just add everything as a safe.directory for git since these will be used in multiple places with git | ||||
| RUN git config --global --add safe.directory '*' | ||||
| @ -40,12 +41,9 @@ RUN bash ./install_conda.sh && rm install_conda.sh | ||||
|  | ||||
| # Install CUDA | ||||
| FROM base as cuda | ||||
| ARG CUDA_VERSION=12.6 | ||||
| ARG CUDA_VERSION=12.4 | ||||
| RUN rm -rf /usr/local/cuda-* | ||||
| ADD ./common/install_cuda.sh install_cuda.sh | ||||
| COPY ./common/install_nccl.sh install_nccl.sh | ||||
| COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/ | ||||
| COPY ./common/install_cusparselt.sh install_cusparselt.sh | ||||
| ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION} | ||||
| # Preserve CUDA_VERSION for the builds | ||||
| ENV CUDA_VERSION=${CUDA_VERSION} | ||||
| @ -56,20 +54,18 @@ FROM cuda as cuda11.8 | ||||
| RUN bash ./install_cuda.sh 11.8 | ||||
| ENV DESIRED_CUDA=11.8 | ||||
|  | ||||
| FROM cuda as cuda12.1 | ||||
| RUN bash ./install_cuda.sh 12.1 | ||||
| ENV DESIRED_CUDA=12.1 | ||||
|  | ||||
| FROM cuda as cuda12.4 | ||||
| RUN bash ./install_cuda.sh 12.4 | ||||
| ENV DESIRED_CUDA=12.4 | ||||
|  | ||||
| FROM cuda as cuda12.6 | ||||
| RUN bash ./install_cuda.sh 12.6 | ||||
| ENV DESIRED_CUDA=12.6 | ||||
|  | ||||
| FROM cuda as cuda12.8 | ||||
| RUN bash ./install_cuda.sh 12.8 | ||||
| ENV DESIRED_CUDA=12.8 | ||||
|  | ||||
| FROM ${ROCM_IMAGE} as rocm | ||||
| ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
| ADD ./common/install_mkl.sh install_mkl.sh | ||||
| RUN bash ./install_mkl.sh && rm install_mkl.sh | ||||
| ENV MKLROOT /opt/intel | ||||
|  | ||||
| # Install MNIST test data | ||||
| FROM base as mnist | ||||
| ADD ./common/install_mnist.sh install_mnist.sh | ||||
| @ -77,8 +73,9 @@ RUN bash ./install_mnist.sh | ||||
|  | ||||
| FROM base as all_cuda | ||||
| COPY --from=cuda11.8  /usr/local/cuda-11.8 /usr/local/cuda-11.8 | ||||
| COPY --from=cuda12.1  /usr/local/cuda-12.1 /usr/local/cuda-12.1 | ||||
| COPY --from=cuda12.4  /usr/local/cuda-12.4 /usr/local/cuda-12.4 | ||||
| COPY --from=cuda12.6  /usr/local/cuda-12.6 /usr/local/cuda-12.6 | ||||
| COPY --from=cuda12.4  /usr/local/cuda-12.8 /usr/local/cuda-12.8 | ||||
|  | ||||
| # Final step | ||||
| FROM ${BASE_TARGET} as final | ||||
|  | ||||
| @ -1,70 +1,82 @@ | ||||
| #!/usr/bin/env bash | ||||
| # Script used only in CD pipeline | ||||
|  | ||||
| set -exou pipefail | ||||
| set -eou pipefail | ||||
|  | ||||
| image="$1" | ||||
| shift | ||||
|  | ||||
| if [ -z "${image}" ]; then | ||||
|   echo "Usage: $0 IMAGENAME:ARCHTAG" | ||||
|   echo "Usage: $0 IMAGE" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| # Go from imagename:tag to tag | ||||
| DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}') | ||||
| DOCKER_IMAGE_NAME="pytorch/${image}" | ||||
|  | ||||
| CUDA_VERSION="" | ||||
| ROCM_VERSION="" | ||||
| EXTRA_BUILD_ARGS="" | ||||
| if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then | ||||
|     # extract cuda version from image name and tag.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8 | ||||
|     CUDA_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}') | ||||
|     EXTRA_BUILD_ARGS="--build-arg CUDA_VERSION=${CUDA_VERSION}" | ||||
| elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then | ||||
|     # extract rocm version from image name and tag.  e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4 | ||||
|     ROCM_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}') | ||||
|     EXTRA_BUILD_ARGS="--build-arg ROCM_IMAGE=rocm/dev-almalinux-8:${ROCM_VERSION}-complete" | ||||
| fi | ||||
|  | ||||
| case ${DOCKER_TAG_PREFIX} in | ||||
|   cpu) | ||||
|     BASE_TARGET=base | ||||
|     ;; | ||||
|   cuda*) | ||||
|     BASE_TARGET=cuda${CUDA_VERSION} | ||||
|     ;; | ||||
|   rocm*) | ||||
|     BASE_TARGET=rocm | ||||
|     ;; | ||||
|   *) | ||||
|     echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}" | ||||
|     exit 1 | ||||
|     ;; | ||||
| esac | ||||
|  | ||||
| # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712 | ||||
| # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023. | ||||
| sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service | ||||
| sudo systemctl daemon-reload | ||||
| sudo systemctl restart docker | ||||
|  | ||||
| export DOCKER_BUILDKIT=1 | ||||
| TOPDIR=$(git rev-parse --show-toplevel) | ||||
| tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]') | ||||
|  | ||||
| docker build \ | ||||
|   --target final \ | ||||
|   --progress plain \ | ||||
|   --build-arg "BASE_TARGET=${BASE_TARGET}" \ | ||||
|   --build-arg "DEVTOOLSET_VERSION=11" \ | ||||
|   ${EXTRA_BUILD_ARGS} \ | ||||
|   -t ${tmp_tag} \ | ||||
|   $@ \ | ||||
|   -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \ | ||||
|   ${TOPDIR}/.ci/docker/ | ||||
| CUDA_VERSION=${CUDA_VERSION:-12.1} | ||||
|  | ||||
| if [ -n "${CUDA_VERSION}" ]; then | ||||
| case ${CUDA_VERSION} in | ||||
|   cpu) | ||||
|     BASE_TARGET=base | ||||
|     DOCKER_TAG=cpu | ||||
|     ;; | ||||
|   all) | ||||
|     BASE_TARGET=all_cuda | ||||
|     DOCKER_TAG=latest | ||||
|     ;; | ||||
|   *) | ||||
|     BASE_TARGET=cuda${CUDA_VERSION} | ||||
|     DOCKER_TAG=cuda${CUDA_VERSION} | ||||
|     ;; | ||||
| esac | ||||
|  | ||||
|  | ||||
| ( | ||||
|   set -x | ||||
|   # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712 | ||||
|   # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023. | ||||
|   sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service | ||||
|   sudo systemctl daemon-reload | ||||
|   sudo systemctl restart docker | ||||
|  | ||||
|   docker build \ | ||||
|     --target final \ | ||||
|     --progress plain \ | ||||
|     --build-arg "BASE_TARGET=${BASE_TARGET}" \ | ||||
|     --build-arg "CUDA_VERSION=${CUDA_VERSION}" \ | ||||
|     --build-arg "DEVTOOLSET_VERSION=11" \ | ||||
|     -t ${DOCKER_IMAGE_NAME} \ | ||||
|     $@ \ | ||||
|     -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \ | ||||
|     ${TOPDIR}/.ci/docker/ | ||||
| ) | ||||
|  | ||||
| if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then | ||||
|   # Test that we're using the right CUDA compiler | ||||
|   docker run --rm "${tmp_tag}" nvcc --version | grep "cuda_${CUDA_VERSION}" | ||||
|   ( | ||||
|     set -x | ||||
|     docker run --rm "${DOCKER_IMAGE_NAME}" nvcc --version | grep "cuda_${CUDA_VERSION}" | ||||
|   ) | ||||
| fi | ||||
|  | ||||
| GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)} | ||||
| GIT_BRANCH_NAME=${GITHUB_REF##*/} | ||||
| GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)} | ||||
| DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE_NAME}-${GIT_BRANCH_NAME} | ||||
| DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE_NAME}-${GIT_COMMIT_SHA} | ||||
| if [[ "${WITH_PUSH:-}" == true ]]; then | ||||
|   ( | ||||
|     set -x | ||||
|     docker push "${DOCKER_IMAGE_NAME}" | ||||
|     if [[ -n ${GITHUB_REF} ]]; then | ||||
|         docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_BRANCH_TAG} | ||||
|         docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_SHA_TAG} | ||||
|         docker push "${DOCKER_IMAGE_BRANCH_TAG}" | ||||
|         docker push "${DOCKER_IMAGE_SHA_TAG}" | ||||
|     fi | ||||
|   ) | ||||
| fi | ||||
|  | ||||
| @ -85,6 +85,9 @@ elif [[ "$image" == *linter* ]]; then | ||||
|   DOCKERFILE="linter/Dockerfile" | ||||
| fi | ||||
|  | ||||
| # CMake 3.18 is needed to support CUDA17 language variant | ||||
| CMAKE_VERSION=3.18.5 | ||||
|  | ||||
| _UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb | ||||
| _UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b | ||||
| if [[ "$image" == *rocm* ]]; then | ||||
| @ -92,32 +95,66 @@ if [[ "$image" == *rocm* ]]; then | ||||
|   _UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d | ||||
| fi | ||||
|  | ||||
| tag=$(echo $image | awk -F':' '{print $2}') | ||||
|  | ||||
| # It's annoying to rename jobs every time you want to rewrite a | ||||
| # configuration, so we hardcode everything here rather than do it | ||||
| # from scratch | ||||
| case "$tag" in | ||||
| case "$image" in | ||||
|   pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11) | ||||
|     CUDA_VERSION=12.6.3 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.8 | ||||
|   pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.4.1 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.4.1 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-cuda12.4-cudnn9-py3.13-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.4.1 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.13 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
| @ -126,45 +163,57 @@ case "$tag" in | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.6 | ||||
|   pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.6.3 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.6 | ||||
|   pytorch-linux-focal-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.6.3 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.6 | ||||
|   pytorch-linux-focal-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.6.3 | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.13 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
| @ -173,81 +222,115 @@ case "$tag" in | ||||
|     CUDNN_VERSION=9 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-py3-clang10-onnx) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CLANG_VERSION=10 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     ONNX=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-py3.9-clang10) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CLANG_VERSION=10 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     VULKAN_SDK_VERSION=1.2.162.1 | ||||
|     SWIFTSHADER=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-py3.11-clang10) | ||||
|     ANACONDA_PYTHON_VERSION=3.11 | ||||
|     CLANG_VERSION=10 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     VULKAN_SDK_VERSION=1.2.162.1 | ||||
|     SWIFTSHADER=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-py3.9-gcc9) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=9 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-rocm-n-1-py3) | ||||
|   pytorch-linux-focal-rocm-n-1-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=6.2.4 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-rocm-n-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=6.3 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-rocm-n-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|   pytorch-linux-jammy-xpu-2024.0-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     ROCM_VERSION=6.4 | ||||
|     XPU_VERSION=0.5 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-xpu-2025.0-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     XPU_VERSION=2025.0 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-xpu-2025.1-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     XPU_VERSION=2025.1 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|     pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     DOCS=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
| @ -257,30 +340,40 @@ case "$tag" in | ||||
|     CUDA_VERSION=11.8 | ||||
|     CUDNN_VERSION=9 | ||||
|     CLANG_VERSION=12 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3-clang12-asan) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CLANG_VERSION=12 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3-clang15-asan) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     CLANG_VERSION=15 | ||||
|     CONDA_CMAKE=yes | ||||
|     VISION=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3-clang18-asan) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     CLANG_VERSION=18 | ||||
|     CONDA_CMAKE=yes | ||||
|     VISION=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3.9-gcc11) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON=yes | ||||
|     DOCS=yes | ||||
|     UNINSTALL_DILL=yes | ||||
| @ -288,12 +381,14 @@ case "$tag" in | ||||
|   pytorch-linux-jammy-py3-clang12-executorch) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     CLANG_VERSION=12 | ||||
|     CONDA_CMAKE=yes | ||||
|     EXECUTORCH=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3.12-halide) | ||||
|     CUDA_VERSION=12.6 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=11 | ||||
|     CONDA_CMAKE=yes | ||||
|     HALIDE=yes | ||||
|     TRITON=yes | ||||
|     ;; | ||||
| @ -301,23 +396,29 @@ case "$tag" in | ||||
|     CUDA_VERSION=12.6 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=11 | ||||
|     CONDA_CMAKE=yes | ||||
|     TRITON_CPU=yes | ||||
|     ;; | ||||
|   pytorch-linux-focal-linter) | ||||
|     # TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627. | ||||
|     # We will need to update mypy version eventually, but that's for another day. The task | ||||
|     # would be to upgrade mypy to 1.0.0 with Python 3.11 | ||||
|     PYTHON_VERSION=3.9 | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CONDA_CMAKE=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter) | ||||
|     PYTHON_VERSION=3.9 | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CUDA_VERSION=11.8 | ||||
|     CONDA_CMAKE=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-aarch64-py3.10-gcc11) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     ACL=yes | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     # snadampal: skipping llvm src build install because the current version | ||||
|     # from pytorch/llvm:9.0.1 is x86 specific | ||||
|     SKIP_LLVM_SRC_BUILD_INSTALL=yes | ||||
| @ -326,7 +427,10 @@ case "$tag" in | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     ACL=yes | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     # snadampal: skipping llvm src build install because the current version | ||||
|     # from pytorch/llvm:9.0.1 is x86 specific | ||||
|     SKIP_LLVM_SRC_BUILD_INSTALL=yes | ||||
| @ -334,6 +438,8 @@ case "$tag" in | ||||
|     ;; | ||||
|   *) | ||||
|     # Catch-all for builds that are not hardcoded. | ||||
|     PROTOBUF=yes | ||||
|     DB=yes | ||||
|     VISION=yes | ||||
|     echo "image '$image' did not match an existing build configuration" | ||||
|     if [[ "$image" == *py* ]]; then | ||||
| @ -349,7 +455,8 @@ case "$tag" in | ||||
|       TRITON=yes | ||||
|       # To ensure that any ROCm config will build using conda cmake | ||||
|       # and thus have LAPACK/MKL enabled | ||||
|       fi | ||||
|       CONDA_CMAKE=yes | ||||
|     fi | ||||
|     if [[ "$image" == *centos7* ]]; then | ||||
|       NINJA_VERSION=1.10.2 | ||||
|     fi | ||||
| @ -365,6 +472,9 @@ case "$tag" in | ||||
|     if [[ "$image" == *glibc* ]]; then | ||||
|       extract_version_from_image_name glibc GLIBC_VERSION | ||||
|     fi | ||||
|     if [[ "$image" == *cmake* ]]; then | ||||
|       extract_version_from_image_name cmake CMAKE_VERSION | ||||
|     fi | ||||
|   ;; | ||||
| esac | ||||
|  | ||||
| @ -378,20 +488,14 @@ if [[ "$image" == *cuda*  && ${OS} == "ubuntu" ]]; then | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| no_cache_flag="" | ||||
| progress_flag="" | ||||
| # Do not use cache and progress=plain when in CI | ||||
| if [[ -n "${CI:-}" ]]; then | ||||
|   no_cache_flag="--no-cache" | ||||
|   progress_flag="--progress=plain" | ||||
| fi | ||||
|  | ||||
| # Build image | ||||
| docker build \ | ||||
|        ${no_cache_flag} \ | ||||
|        ${progress_flag} \ | ||||
|        --no-cache \ | ||||
|        --progress=plain \ | ||||
|        --build-arg "BUILD_ENVIRONMENT=${image}" \ | ||||
|        --build-arg "PROTOBUF=${PROTOBUF:-}" \ | ||||
|        --build-arg "LLVMDEV=${LLVMDEV:-}" \ | ||||
|        --build-arg "DB=${DB:-}" \ | ||||
|        --build-arg "VISION=${VISION:-}" \ | ||||
|        --build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \ | ||||
|        --build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \ | ||||
| @ -399,12 +503,14 @@ docker build \ | ||||
|        --build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \ | ||||
|        --build-arg "CLANG_VERSION=${CLANG_VERSION}" \ | ||||
|        --build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \ | ||||
|        --build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \ | ||||
|        --build-arg "GCC_VERSION=${GCC_VERSION}" \ | ||||
|        --build-arg "CUDA_VERSION=${CUDA_VERSION}" \ | ||||
|        --build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \ | ||||
|        --build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \ | ||||
|        --build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \ | ||||
|        --build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \ | ||||
|        --build-arg "SWIFTSHADER=${SWIFTSHADER}" \ | ||||
|        --build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \ | ||||
|        --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \ | ||||
|        --build-arg "KATEX=${KATEX:-}" \ | ||||
|        --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \ | ||||
| @ -412,6 +518,7 @@ docker build \ | ||||
|        --build-arg "IMAGE_NAME=${IMAGE_NAME}" \ | ||||
|        --build-arg "UCX_COMMIT=${UCX_COMMIT}" \ | ||||
|        --build-arg "UCC_COMMIT=${UCC_COMMIT}" \ | ||||
|        --build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \ | ||||
|        --build-arg "TRITON=${TRITON}" \ | ||||
|        --build-arg "TRITON_CPU=${TRITON_CPU}" \ | ||||
|        --build-arg "ONNX=${ONNX}" \ | ||||
| @ -420,7 +527,6 @@ docker build \ | ||||
|        --build-arg "EXECUTORCH=${EXECUTORCH}" \ | ||||
|        --build-arg "HALIDE=${HALIDE}" \ | ||||
|        --build-arg "XPU_VERSION=${XPU_VERSION}" \ | ||||
|        --build-arg "UNINSTALL_DILL=${UNINSTALL_DILL}" \ | ||||
|        --build-arg "ACL=${ACL:-}" \ | ||||
|        --build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \ | ||||
|        --build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \ | ||||
| @ -438,7 +544,7 @@ docker build \ | ||||
| UBUNTU_VERSION=$(echo ${UBUNTU_VERSION} | sed 's/-rc$//') | ||||
|  | ||||
| function drun() { | ||||
|   docker run --rm "$tmp_tag" "$@" | ||||
|   docker run --rm "$tmp_tag" $* | ||||
| } | ||||
|  | ||||
| if [[ "$OS" == "ubuntu" ]]; then | ||||
| @ -486,23 +592,3 @@ if [ -n "$KATEX" ]; then | ||||
|     exit 1 | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| HAS_TRITON=$(drun python -c "import triton" > /dev/null 2>&1 && echo "yes" || echo "no") | ||||
| if [[ -n "$TRITON" || -n "$TRITON_CPU" ]]; then | ||||
|   if [ "$HAS_TRITON" = "no" ]; then | ||||
|     echo "expecting triton to be installed, but it is not" | ||||
|     exit 1 | ||||
|   fi | ||||
| elif [ "$HAS_TRITON" = "yes" ]; then | ||||
|   echo "expecting triton to not be installed, but it is" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| # Sanity check cmake version.  Executorch reinstalls cmake and I'm not sure if | ||||
| # they support 4.0.0 yet, so exclude them from this check. | ||||
| CMAKE_VERSION=$(drun cmake --version) | ||||
| if [[ "$EXECUTORCH" != *yes* && "$CMAKE_VERSION" != *4.* ]]; then | ||||
|   echo "CMake version is not 4.0.0:" | ||||
|   drun cmake --version | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| @ -17,8 +17,9 @@ RUN bash ./install_base.sh && rm install_base.sh | ||||
| # Update CentOS git version | ||||
| RUN yum -y remove git | ||||
| RUN yum -y remove git-* | ||||
| RUN yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm && \ | ||||
|     sed -i 's/packages.endpoint/packages.endpointdev/' /etc/yum.repos.d/endpoint.repo | ||||
| RUN yum -y install https://packages.endpoint.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm || \ | ||||
|     (yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm && \ | ||||
|     sed -i "s/packages.endpoint/packages.endpointdev/" /etc/yum.repos.d/endpoint.repo) | ||||
| RUN yum install -y git | ||||
|  | ||||
| # Install devtoolset | ||||
| @ -39,6 +40,7 @@ RUN bash ./install_user.sh && rm install_user.sh | ||||
|  | ||||
| # Install conda and other packages (e.g., numpy, pytest) | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ARG CONDA_CMAKE | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH | ||||
| COPY requirements-ci.txt /opt/conda/requirements-ci.txt | ||||
| @ -46,6 +48,20 @@ COPY ./common/install_conda.sh install_conda.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt | ||||
|  | ||||
| # (optional) Install protobuf for ONNX | ||||
| ARG PROTOBUF | ||||
| COPY ./common/install_protobuf.sh install_protobuf.sh | ||||
| RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi | ||||
| RUN rm install_protobuf.sh | ||||
| ENV INSTALLED_PROTOBUF ${PROTOBUF} | ||||
|  | ||||
| # (optional) Install database packages like LMDB and LevelDB | ||||
| ARG DB | ||||
| COPY ./common/install_db.sh install_db.sh | ||||
| RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi | ||||
| RUN rm install_db.sh | ||||
| ENV INSTALLED_DB ${DB} | ||||
|  | ||||
| # (optional) Install vision packages like OpenCV | ||||
| ARG VISION | ||||
| COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ | ||||
| @ -59,7 +75,7 @@ COPY ./common/install_rocm.sh install_rocm.sh | ||||
| RUN bash ./install_rocm.sh | ||||
| RUN rm install_rocm.sh | ||||
| COPY ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} | ||||
| RUN bash ./install_rocm_magma.sh | ||||
| RUN rm install_rocm_magma.sh | ||||
| COPY ./common/install_amdsmi.sh install_amdsmi.sh | ||||
| RUN bash ./install_amdsmi.sh | ||||
| @ -73,6 +89,12 @@ ENV MAGMA_HOME /opt/rocm/magma | ||||
| ENV LANG en_US.utf8 | ||||
| ENV LC_ALL en_US.utf8 | ||||
|  | ||||
| # (optional) Install non-default CMake version | ||||
| ARG CMAKE_VERSION | ||||
| COPY ./common/install_cmake.sh install_cmake.sh | ||||
| RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi | ||||
| RUN rm install_cmake.sh | ||||
|  | ||||
| # (optional) Install non-default Ninja version | ||||
| ARG NINJA_VERSION | ||||
| COPY ./common/install_ninja.sh install_ninja.sh | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| b173722085b3f555d6ba4533d6bbaddfd7c71144 | ||||
| 5e4d6b6380d575e48e37e9d987fded4ec588e7bc | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| v2.26.5-1 | ||||
| v2.25.1-1 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| b0e26b7359c147b8aa0af686c20510fb9b15990a | ||||
| 83111ab22be6e4a588d184ac45175986a7dde9fc | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| c8757738a7418249896224430ce84888e8ecdd79 | ||||
| 96316ce50fade7e209553aba4898cd9b82aab83b | ||||
|  | ||||
| @ -37,7 +37,7 @@ install_ubuntu() { | ||||
|   if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then | ||||
|     maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages" | ||||
|   elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then | ||||
|     maybe_libnccl_dev="libnccl2=2.26.2-1+cuda12.4 libnccl-dev=2.26.2-1+cuda12.4 --allow-downgrades --allow-change-held-packages" | ||||
|     maybe_libnccl_dev="libnccl2=2.25.1-1+cuda12.4 libnccl-dev=2.25.1-1+cuda12.4 --allow-downgrades --allow-change-held-packages" | ||||
|   else | ||||
|     maybe_libnccl_dev="" | ||||
|   fi | ||||
| @ -99,6 +99,9 @@ install_centos() { | ||||
|  | ||||
|   ccache_deps="asciidoc docbook-dtds docbook-style-xsl libxslt" | ||||
|   numpy_deps="gcc-gfortran" | ||||
|   # Note: protobuf-c-{compiler,devel} on CentOS are too old to be used | ||||
|   # for Caffe2. That said, we still install them to make sure the build | ||||
|   # system opts to build/use protoc and libprotobuf from third-party. | ||||
|   yum install -y \ | ||||
|     $ccache_deps \ | ||||
|     $numpy_deps \ | ||||
|  | ||||
| @ -9,7 +9,7 @@ install_ubuntu() { | ||||
|   # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh`` | ||||
|   apt-get install -y cargo | ||||
|   echo "Checking out sccache repo" | ||||
|   git clone https://github.com/mozilla/sccache -b v0.10.0 | ||||
|   git clone https://github.com/mozilla/sccache -b v0.9.1 | ||||
|   cd sccache | ||||
|   echo "Building sccache" | ||||
|   cargo build --release | ||||
|  | ||||
| @ -4,10 +4,16 @@ set -ex | ||||
|  | ||||
| if [ -n "$CLANG_VERSION" ]; then | ||||
|  | ||||
|   if [[ $UBUNTU_VERSION == 22.04 ]]; then | ||||
|   if [[ $CLANG_VERSION == 9 && $UBUNTU_VERSION == 18.04 ]]; then | ||||
|     sudo apt-get update | ||||
|     # gpg-agent is not available by default on 18.04 | ||||
|     sudo apt-get install  -y --no-install-recommends gpg-agent | ||||
|     wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add  - | ||||
|     apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-${CLANG_VERSION} main" | ||||
|   elif [[ $UBUNTU_VERSION == 22.04 ]]; then | ||||
|     # work around ubuntu apt-get conflicts | ||||
|     sudo apt-get -y -f install | ||||
|     wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - | ||||
|     wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add  - | ||||
|     if [[ $CLANG_VERSION == 18 ]]; then | ||||
|       apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main" | ||||
|     fi | ||||
| @ -35,7 +41,7 @@ if [ -n "$CLANG_VERSION" ]; then | ||||
|   # clang's packaging is a little messed up (the runtime libs aren't | ||||
|   # added into the linker path), so give it a little help | ||||
|   clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux") | ||||
|   echo "$clang_lib" >/etc/ld.so.conf.d/clang.conf | ||||
|   echo "$clang_lib" > /etc/ld.so.conf.d/clang.conf | ||||
|   ldconfig | ||||
|  | ||||
|   # Cleanup package manager | ||||
|  | ||||
							
								
								
									
										31
									
								
								.ci/docker/common/install_cmake.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										31
									
								
								.ci/docker/common/install_cmake.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,31 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| [ -n "$CMAKE_VERSION" ] | ||||
|  | ||||
| # Remove system cmake install so it won't get used instead | ||||
| ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') | ||||
| case "$ID" in | ||||
|   ubuntu) | ||||
|     apt-get remove cmake -y | ||||
|     ;; | ||||
|   centos) | ||||
|     yum remove cmake -y | ||||
|     ;; | ||||
|   *) | ||||
|     echo "Unable to determine OS..." | ||||
|     exit 1 | ||||
|     ;; | ||||
| esac | ||||
|  | ||||
| # Turn 3.6.3 into v3.6 | ||||
| path=$(echo "${CMAKE_VERSION}" | sed -e 's/\([0-9].[0-9]\+\).*/v\1/') | ||||
| file="cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz" | ||||
|  | ||||
| # Download and install specific CMake version in /usr/local | ||||
| pushd /tmp | ||||
| curl -Os --retry 3 "https://cmake.org/files/${path}/${file}" | ||||
| tar -C /usr/local --strip-components 1 --no-same-owner -zxf cmake-*.tar.gz | ||||
| rm -f cmake-*.tar.gz | ||||
| popd | ||||
| @ -7,7 +7,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then | ||||
|   BASE_URL="https://repo.anaconda.com/miniconda" | ||||
|   CONDA_FILE="Miniconda3-latest-Linux-x86_64.sh" | ||||
|   if [[ $(uname -m) == "aarch64" ]] || [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|     BASE_URL="https://github.com/conda-forge/miniforge/releases/latest/download"  # @lint-ignore | ||||
|     BASE_URL="https://github.com/conda-forge/miniforge/releases/latest/download" | ||||
|     CONDA_FILE="Miniforge3-Linux-$(uname -m).sh" | ||||
|   fi | ||||
|  | ||||
| @ -62,7 +62,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then | ||||
|  | ||||
|   # libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30 | ||||
|   # which is provided in libstdcxx 12 and up. | ||||
|   conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge | ||||
|   conda_install libstdcxx-ng=12.3.0 -c conda-forge | ||||
|  | ||||
|   # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README | ||||
|   if [[ $(uname -m) == "aarch64" ]]; then | ||||
| @ -75,11 +75,19 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then | ||||
|   # and libpython-static for torch deploy | ||||
|   conda_install llvmdev=8.0.0 "libpython-static=${ANACONDA_PYTHON_VERSION}" | ||||
|  | ||||
|   # Use conda cmake in some cases. Conda cmake will be newer than our supported | ||||
|   # min version (3.5 for xenial and 3.10 for bionic), so we only do it in those | ||||
|   # following builds that we know should use conda. Specifically, Ubuntu bionic | ||||
|   # and focal cannot find conda mkl with stock cmake, so we need a cmake from conda | ||||
|   if [ -n "${CONDA_CMAKE}" ]; then | ||||
|     conda_install cmake | ||||
|   fi | ||||
|  | ||||
|   # Magma package names are concatenation of CUDA major and minor ignoring revision | ||||
|   # I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89 | ||||
|   # Magma is installed from a tarball in the ossci-linux bucket into the conda env | ||||
|   if [ -n "$CUDA_VERSION" ]; then | ||||
|     conda_run ${SCRIPT_FOLDER}/install_magma_conda.sh $(cut -f1-2 -d'.' <<< ${CUDA_VERSION}) | ||||
|     ${SCRIPT_FOLDER}/install_magma_conda.sh $(cut -f1-2 -d'.' <<< ${CUDA_VERSION}) ${ANACONDA_PYTHON_VERSION} | ||||
|   fi | ||||
|  | ||||
|   # Install some other packages, including those needed for Python test reporting | ||||
|  | ||||
| @ -3,11 +3,11 @@ | ||||
| set -uex -o pipefail | ||||
|  | ||||
| PYTHON_DOWNLOAD_URL=https://www.python.org/ftp/python | ||||
| PYTHON_DOWNLOAD_GITHUB_BRANCH=https://github.com/python/cpython/archive/refs/heads  # @lint-ignore | ||||
| PYTHON_DOWNLOAD_GITHUB_BRANCH=https://github.com/python/cpython/archive/refs/heads | ||||
| GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py | ||||
|  | ||||
| # Python versions to be installed in /opt/$VERSION_NO | ||||
| CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"} | ||||
| CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"} | ||||
|  | ||||
| function check_var { | ||||
|     if [ -z "$1" ]; then | ||||
|  | ||||
| @ -2,82 +2,140 @@ | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| arch_path='' | ||||
| targetarch=${TARGETARCH:-$(uname -m)} | ||||
| if [ ${targetarch} = 'amd64' ] || [ "${targetarch}" = 'x86_64' ]; then | ||||
|   arch_path='x86_64' | ||||
| else | ||||
|   arch_path='sbsa' | ||||
| fi | ||||
| NCCL_VERSION=v2.25.1-1 | ||||
| CUDNN_VERSION=9.5.1.17 | ||||
|  | ||||
| function install_cuda { | ||||
|   version=$1 | ||||
|   runfile=$2 | ||||
|   major_minor=${version%.*} | ||||
|   rm -rf /usr/local/cuda-${major_minor} /usr/local/cuda | ||||
|   if [[ ${arch_path} == 'sbsa' ]]; then | ||||
|       runfile="${runfile}_sbsa" | ||||
|   fi | ||||
|   runfile="${runfile}.run" | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/${version}/local_installers/${runfile} -O ${runfile} | ||||
|   chmod +x ${runfile} | ||||
|   ./${runfile} --toolkit --silent | ||||
|   rm -f ${runfile} | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-${major_minor} /usr/local/cuda | ||||
| function install_cusparselt_040 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_cudnn { | ||||
|   cuda_major_version=$1 | ||||
|   cudnn_version=$2 | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   filepath="cudnn-linux-${arch_path}-${cudnn_version}_cuda${cuda_major_version}-archive" | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-${arch_path}/${filepath}.tar.xz | ||||
|   tar xf ${filepath}.tar.xz | ||||
|   cp -a ${filepath}/include/* /usr/local/cuda/include/ | ||||
|   cp -a ${filepath}/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
| function install_cusparselt_062 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.2.3-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-x86_64-0.6.2.3-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.6.2.3-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_cusparselt_063 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_118 { | ||||
|     CUDNN_VERSION=9.1.0.70 | ||||
|     echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.4.0" | ||||
|     install_cuda 11.8.0 cuda_11.8.0_520.61.05_linux | ||||
|     NCCL_VERSION=v2.21.5-1 | ||||
|     echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0" | ||||
|     rm -rf /usr/local/cuda-11.8 /usr/local/cuda | ||||
|     # install CUDA 11.8.0 in the same container | ||||
|     wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run | ||||
|     chmod +x cuda_11.8.0_520.61.05_linux.run | ||||
|     ./cuda_11.8.0_520.61.05_linux.run --toolkit --silent | ||||
|     rm -f cuda_11.8.0_520.61.05_linux.run | ||||
|     rm -f /usr/local/cuda && ln -s /usr/local/cuda-11.8 /usr/local/cuda | ||||
|  | ||||
|     install_cudnn 11 $CUDNN_VERSION | ||||
|     # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|     mkdir tmp_cudnn && cd tmp_cudnn | ||||
|     wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz | ||||
|     tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive.tar.xz | ||||
|     cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda11-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     cd .. | ||||
|     rm -rf tmp_cudnn | ||||
|  | ||||
|     CUDA_VERSION=11.8 bash install_nccl.sh | ||||
|     # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|     # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|     git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|     cd nccl && make -j src.build | ||||
|     cp -a build/include/* /usr/local/cuda/include/ | ||||
|     cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|     cd .. | ||||
|     rm -rf nccl | ||||
|  | ||||
|     CUDA_VERSION=11.8 bash install_cusparselt.sh | ||||
|     install_cusparselt_040 | ||||
|  | ||||
|     ldconfig | ||||
| } | ||||
|  | ||||
| function install_124 { | ||||
|   CUDNN_VERSION=9.1.0.70 | ||||
|   echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.2" | ||||
|   install_cuda 12.4.1 cuda_12.4.1_550.54.15_linux | ||||
|   echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" | ||||
|   rm -rf /usr/local/cuda-12.4 /usr/local/cuda | ||||
|   # install CUDA 12.4.1 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run | ||||
|   chmod +x cuda_12.4.1_550.54.15_linux.run | ||||
|   ./cuda_12.4.1_550.54.15_linux.run --toolkit --silent | ||||
|   rm -f cuda_12.4.1_550.54.15_linux.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda | ||||
|  | ||||
|   install_cudnn 12 $CUDNN_VERSION | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   CUDA_VERSION=12.4 bash install_nccl.sh | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   CUDA_VERSION=12.4 bash install_cusparselt.sh | ||||
|   install_cusparselt_062 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function install_126 { | ||||
|   CUDNN_VERSION=9.5.1.17 | ||||
|   echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.3" | ||||
|   install_cuda 12.6.3 cuda_12.6.3_560.35.05_linux | ||||
|   echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3" | ||||
|   rm -rf /usr/local/cuda-12.6 /usr/local/cuda | ||||
|   # install CUDA 12.6.3 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run | ||||
|   chmod +x cuda_12.6.3_560.35.05_linux.run | ||||
|   ./cuda_12.6.3_560.35.05_linux.run --toolkit --silent | ||||
|   rm -f cuda_12.6.3_560.35.05_linux.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda | ||||
|  | ||||
|   install_cudnn 12 $CUDNN_VERSION | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   CUDA_VERSION=12.6 bash install_nccl.sh | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   CUDA_VERSION=12.6 bash install_cusparselt.sh | ||||
|   install_cusparselt_063 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
| @ -182,17 +240,35 @@ function prune_126 { | ||||
| } | ||||
|  | ||||
| function install_128 { | ||||
|   CUDNN_VERSION=9.8.0.87 | ||||
|   echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.3" | ||||
|   # install CUDA 12.8.1 in the same container | ||||
|   install_cuda 12.8.1 cuda_12.8.1_570.124.06_linux | ||||
|   CUDNN_VERSION=9.7.1.26 | ||||
|   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3" | ||||
|   rm -rf /usr/local/cuda-12.8 /usr/local/cuda | ||||
|   # install CUDA 12.8.0 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux.run | ||||
|   chmod +x cuda_12.8.0_570.86.10_linux.run | ||||
|   ./cuda_12.8.0_570.86.10_linux.run --toolkit --silent | ||||
|   rm -f cuda_12.8.0_570.86.10_linux.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   install_cudnn 12 $CUDNN_VERSION | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   CUDA_VERSION=12.8 bash install_nccl.sh | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   CUDA_VERSION=12.8 bash install_cusparselt.sh | ||||
|   install_cusparselt_063 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
							
								
								
									
										211
									
								
								.ci/docker/common/install_cuda_aarch64.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										211
									
								
								.ci/docker/common/install_cuda_aarch64.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,211 @@ | ||||
| #!/bin/bash | ||||
| # Script used only in CD pipeline | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| NCCL_VERSION=v2.21.5-1 | ||||
| CUDNN_VERSION=9.5.1.17 | ||||
|  | ||||
| function install_cusparselt_062 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_cusparselt_063 { | ||||
|     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
|     mkdir tmp_cusparselt && pushd tmp_cusparselt | ||||
|     wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz | ||||
|     tar xf libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/include/* /usr/local/cuda/include/ | ||||
|     cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/ | ||||
|     popd | ||||
|     rm -rf tmp_cusparselt | ||||
| } | ||||
|  | ||||
| function install_124 { | ||||
|   CUDNN_VERSION=9.1.0.70 | ||||
|   echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" | ||||
|   rm -rf /usr/local/cuda-12.4 /usr/local/cuda | ||||
|   # install CUDA 12.4.1 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run | ||||
|   chmod +x cuda_12.4.1_550.54.15_linux_sbsa.run | ||||
|   ./cuda_12.4.1_550.54.15_linux_sbsa.run --toolkit --silent | ||||
|   rm -f cuda_12.4.1_550.54.15_linux_sbsa.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   install_cusparselt_063 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function prune_124 { | ||||
|   echo "Pruning CUDA 12.4" | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.4 prune static libs | ||||
|   ##################################################################################### | ||||
|   export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune" | ||||
|   export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64" | ||||
|  | ||||
|   export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|   export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|  | ||||
|   if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||||
|       export GENCODE=$OVERRIDE_GENCODE | ||||
|   fi | ||||
|  | ||||
|   # all CUDA libs except CuDNN and CuBLAS | ||||
|   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \ | ||||
|       | xargs -I {} bash -c \ | ||||
|                 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||||
|  | ||||
|   # prune CuDNN and CuBLAS | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.4 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.4/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ | ||||
| } | ||||
|  | ||||
| function install_126 { | ||||
|   echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3" | ||||
|   rm -rf /usr/local/cuda-12.6 /usr/local/cuda | ||||
|   # install CUDA 12.6.3 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux_sbsa.run | ||||
|   chmod +x cuda_12.6.3_560.35.05_linux_sbsa.run | ||||
|   ./cuda_12.6.3_560.35.05_linux_sbsa.run --toolkit --silent | ||||
|   rm -f cuda_12.6.3_560.35.05_linux_sbsa.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   install_cusparselt_063 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function prune_126 { | ||||
|   echo "Pruning CUDA 12.6" | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune static libs | ||||
|   ##################################################################################### | ||||
|   export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune" | ||||
|   export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64" | ||||
|  | ||||
|   export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|   export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|  | ||||
|   if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||||
|       export GENCODE=$OVERRIDE_GENCODE | ||||
|   fi | ||||
|   if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then | ||||
|       export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN | ||||
|   fi | ||||
|  | ||||
|   # all CUDA libs except CuDNN and CuBLAS | ||||
|   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \ | ||||
|       | xargs -I {} bash -c \ | ||||
|                 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||||
|  | ||||
|   # prune CuDNN and CuBLAS | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.6/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/ | ||||
| } | ||||
|  | ||||
| function install_128 { | ||||
|   CUDNN_VERSION=9.7.1.26 | ||||
|   echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3" | ||||
|   rm -rf /usr/local/cuda-12.8 /usr/local/cuda | ||||
|   # install CUDA 12.8.0 in the same container | ||||
|   wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux_sbsa.run | ||||
|   chmod +x cuda_12.8.0_570.86.10_linux_sbsa.run | ||||
|   ./cuda_12.8.0_570.86.10_linux_sbsa.run --toolkit --silent | ||||
|   rm -f cuda_12.8.0_570.86.10_linux_sbsa.run | ||||
|   rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   mkdir tmp_cudnn && cd tmp_cudnn | ||||
|   wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ | ||||
|   cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf tmp_cudnn | ||||
|  | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   cd nccl && make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   cd .. | ||||
|   rm -rf nccl | ||||
|  | ||||
|   install_cusparselt_063 | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| # idiomatic parameter and option handling in sh | ||||
| while test $# -gt 0 | ||||
| do | ||||
|     case "$1" in | ||||
|     12.4) install_124; prune_124 | ||||
|         ;; | ||||
|     12.6) install_126; prune_126 | ||||
|         ;; | ||||
|     12.8) install_128; | ||||
|         ;; | ||||
|     *) echo "bad argument $1"; exit 1 | ||||
|         ;; | ||||
|     esac | ||||
|     shift | ||||
| done | ||||
| @ -5,7 +5,7 @@ if [[ -n "${CUDNN_VERSION}" ]]; then | ||||
|     mkdir tmp_cudnn | ||||
|     pushd tmp_cudnn | ||||
|     if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then | ||||
|         CUDNN_NAME="cudnn-linux-x86_64-9.8.0.87_cuda12-archive" | ||||
|         CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive" | ||||
|     elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then | ||||
|         CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive" | ||||
|     elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then | ||||
|  | ||||
							
								
								
									
										38
									
								
								.ci/docker/common/install_db.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										38
									
								
								.ci/docker/common/install_db.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,38 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| install_ubuntu() { | ||||
|   apt-get update | ||||
|  | ||||
|   # Cleanup | ||||
|   apt-get autoclean && apt-get clean | ||||
|   rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* | ||||
| } | ||||
|  | ||||
| install_centos() { | ||||
|   # Need EPEL for many packages we depend on. | ||||
|   # See http://fedoraproject.org/wiki/EPEL | ||||
|   yum --enablerepo=extras install -y epel-release | ||||
|  | ||||
|   # Cleanup | ||||
|   yum clean all | ||||
|   rm -rf /var/cache/yum | ||||
|   rm -rf /var/lib/yum/yumdb | ||||
|   rm -rf /var/lib/yum/history | ||||
| } | ||||
|  | ||||
| # Install base packages depending on the base OS | ||||
| ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') | ||||
| case "$ID" in | ||||
|   ubuntu) | ||||
|     install_ubuntu | ||||
|     ;; | ||||
|   centos) | ||||
|     install_centos | ||||
|     ;; | ||||
|   *) | ||||
|     echo "Unable to determine OS..." | ||||
|     exit 1 | ||||
|     ;; | ||||
| esac | ||||
| @ -13,7 +13,7 @@ clone_executorch() { | ||||
|   # and fetch the target commit | ||||
|   pushd executorch | ||||
|   git checkout "${EXECUTORCH_PINNED_COMMIT}" | ||||
|   git submodule update --init --recursive | ||||
|   git submodule update --init | ||||
|   popd | ||||
|  | ||||
|   chown -R jenkins executorch | ||||
| @ -50,9 +50,10 @@ setup_executorch() { | ||||
|   pushd executorch | ||||
|  | ||||
|   export PYTHON_EXECUTABLE=python | ||||
|   export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" | ||||
|   export EXECUTORCH_BUILD_PYBIND=ON | ||||
|   export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" | ||||
|  | ||||
|   as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true | ||||
|   as_jenkins .ci/scripts/setup-linux.sh cmake || true | ||||
|   popd | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -17,7 +17,7 @@ if [ -n "${UBUNTU_VERSION}" ];then | ||||
|                   libopenblas-dev libeigen3-dev libatlas-base-dev libzstd-dev | ||||
| fi | ||||
|  | ||||
| pip_install numpy scipy imageio cmake ninja | ||||
| conda_install numpy scipy imageio cmake ninja | ||||
|  | ||||
| git clone --depth 1 --branch release/16.x --recursive https://github.com/llvm/llvm-project.git | ||||
| cmake -DCMAKE_BUILD_TYPE=Release \ | ||||
| @ -35,9 +35,7 @@ git clone https://github.com/halide/Halide.git | ||||
| pushd Halide | ||||
| git checkout ${COMMIT} && git submodule update --init --recursive | ||||
| pip_install -r requirements.txt | ||||
| # NOTE: pybind has a requirement for cmake > 3.5 so set the minimum cmake version here with a flag | ||||
| #       Context: https://github.com/pytorch/pytorch/issues/150420 | ||||
| cmake -G Ninja -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_BUILD_TYPE=Release -S . -B build | ||||
| cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -S . -B build | ||||
| cmake --build build | ||||
| test -e ${CONDA_PREFIX}/lib/python3 || ln -s python${ANACONDA_PYTHON_VERSION} ${CONDA_PREFIX}/lib/python3 | ||||
| cmake --install build --prefix ${CONDA_PREFIX} | ||||
|  | ||||
| @ -14,9 +14,16 @@ function install_timm() { | ||||
|   local commit | ||||
|   commit=$(get_pinned_commit timm) | ||||
|  | ||||
|   # TODO (huydhn): There is no torchvision release on 3.13 when I write this, so | ||||
|   # I'm using nightly here instead. We just need to package to be able to install | ||||
|   # TIMM. Removing this once vision has a release on 3.13 | ||||
|   if [[ "${ANACONDA_PYTHON_VERSION}" == "3.13" ]]; then | ||||
|     pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124 | ||||
|   fi | ||||
|  | ||||
|   pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}" | ||||
|   # Clean up | ||||
|   conda_run pip uninstall -y torch torchvision triton | ||||
|   conda_run pip uninstall -y cmake torch torchvision triton | ||||
| } | ||||
|  | ||||
| # Pango is needed for weasyprint which is needed for doctr | ||||
|  | ||||
| @ -2,6 +2,8 @@ | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" | ||||
|  | ||||
| if [ -n "${UBUNTU_VERSION}" ]; then | ||||
|   apt update | ||||
|   apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5 | ||||
| @ -13,8 +15,8 @@ chown -R jenkins pytorch | ||||
|  | ||||
| pushd pytorch | ||||
| # Install all linter dependencies | ||||
| pip install -r requirements.txt | ||||
| lintrunner init | ||||
| pip_install -r requirements.txt | ||||
| conda_run lintrunner init | ||||
|  | ||||
| # Cache .lintbin directory as part of the Docker image | ||||
| cp -r .lintbin /tmp | ||||
|  | ||||
| @ -1,23 +1,26 @@ | ||||
| #!/usr/bin/env bash | ||||
| # Script that installs magma from tarball inside conda environment. | ||||
| # It replaces anaconda magma-cuda package which is no longer published. | ||||
| # Execute it inside active conda environment. | ||||
| # See issue: https://github.com/pytorch/pytorch/issues/138506 | ||||
| # Script that replaces the magma install from a conda package | ||||
|  | ||||
| set -eou pipefail | ||||
|  | ||||
| cuda_version_nodot=${1/./} | ||||
| anaconda_dir=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} | ||||
| function do_install() { | ||||
|     cuda_version_nodot=${1/./} | ||||
|     anaconda_python_version=$2 | ||||
|  | ||||
| MAGMA_VERSION="2.6.1" | ||||
| magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" | ||||
| ( | ||||
|     set -x | ||||
|     tmp_dir=$(mktemp -d) | ||||
|     pushd ${tmp_dir} | ||||
|     curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive} | ||||
|     tar -xvf "${magma_archive}" | ||||
|     mv include/* "${anaconda_dir}/include/" | ||||
|     mv lib/* "${anaconda_dir}/lib" | ||||
|     popd | ||||
| ) | ||||
|     MAGMA_VERSION="2.6.1" | ||||
|     magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" | ||||
|  | ||||
|     anaconda_dir="/opt/conda/envs/py_${anaconda_python_version}" | ||||
|     ( | ||||
|         set -x | ||||
|         tmp_dir=$(mktemp -d) | ||||
|         pushd ${tmp_dir} | ||||
|         curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive} | ||||
|         tar -xvf "${magma_archive}" | ||||
|         mv include/* "${anaconda_dir}/include/" | ||||
|         mv lib/* "${anaconda_dir}/lib" | ||||
|         popd | ||||
|     ) | ||||
| } | ||||
|  | ||||
| do_install $1 $2 | ||||
|  | ||||
| @ -1,26 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| NCCL_VERSION="" | ||||
| if [[ ${CUDA_VERSION:0:2} == "11" ]]; then | ||||
|   NCCL_VERSION=$(cat ci_commit_pins/nccl-cu11.txt) | ||||
| elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then | ||||
|   NCCL_VERSION=$(cat ci_commit_pins/nccl-cu12.txt) | ||||
| else | ||||
|   echo "Unexpected CUDA_VERSION ${CUDA_VERSION}" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| if [[ -n "${NCCL_VERSION}" ]]; then | ||||
|   # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses | ||||
|   # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build | ||||
|   git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git | ||||
|   pushd nccl | ||||
|   make -j src.build | ||||
|   cp -a build/include/* /usr/local/cuda/include/ | ||||
|   cp -a build/lib/* /usr/local/cuda/lib64/ | ||||
|   popd | ||||
|   rm -rf nccl | ||||
|   ldconfig | ||||
| fi | ||||
| @ -31,7 +31,8 @@ pip_install \ | ||||
| pip_install coloredlogs packaging | ||||
|  | ||||
| pip_install onnxruntime==1.18.1 | ||||
| pip_install onnxscript==0.2.6 --no-deps | ||||
| pip_install onnx==1.17.0 | ||||
| pip_install onnxscript==0.2.2 --no-deps | ||||
| # required by onnxscript | ||||
| pip_install ml_dtypes | ||||
|  | ||||
|  | ||||
							
								
								
									
										19
									
								
								.ci/docker/common/install_protobuf.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										19
									
								
								.ci/docker/common/install_protobuf.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,19 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| pb_dir="/usr/temp_pb_install_dir" | ||||
| mkdir -p $pb_dir | ||||
|  | ||||
| # On the nvidia/cuda:9-cudnn7-devel-centos7 image we need this symlink or | ||||
| # else it will fail with | ||||
| #   g++: error: ./../lib64/crti.o: No such file or directory | ||||
| ln -s /usr/lib64 "$pb_dir/lib64" | ||||
|  | ||||
| curl -LO "https://github.com/protocolbuffers/protobuf/releases/download/v3.17.3/protobuf-all-3.17.3.tar.gz" --retry 3 | ||||
|  | ||||
| tar -xvz --no-same-owner -C "$pb_dir" --strip-components 1 -f protobuf-all-3.17.3.tar.gz | ||||
| NPROC=$[$(nproc) - 2] | ||||
| pushd "$pb_dir" && ./configure && make -j${NPROC} && make -j${NPROC} check && sudo make -j${NRPOC} install && sudo ldconfig | ||||
| popd | ||||
| rm -rf $pb_dir | ||||
| @ -1,15 +0,0 @@ | ||||
| #!/bin/bash | ||||
| set -ex | ||||
|  | ||||
| apt-get update | ||||
| # Use deadsnakes in case we need an older python version | ||||
| sudo add-apt-repository ppa:deadsnakes/ppa | ||||
| apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python3-pip python${PYTHON_VERSION}-venv | ||||
|  | ||||
| # Use a venv because uv and some other package managers don't support --user install | ||||
| ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python | ||||
| python -m venv /var/lib/jenkins/ci_env | ||||
| source /var/lib/jenkins/ci_env/bin/activate | ||||
|  | ||||
| python -mpip install --upgrade pip | ||||
| python -mpip install -r /opt/requirements-ci.txt | ||||
| @ -8,6 +8,10 @@ ver() { | ||||
|  | ||||
| install_ubuntu() { | ||||
|     apt-get update | ||||
|     if [[ $UBUNTU_VERSION == 18.04 ]]; then | ||||
|       # gpg-agent is not available by default on 18.04 | ||||
|       apt-get install -y --no-install-recommends gpg-agent | ||||
|     fi | ||||
|     if [[ $UBUNTU_VERSION == 20.04 ]]; then | ||||
|       # gpg-agent is not available by default on 20.04 | ||||
|       apt-get install -y --no-install-recommends gpg-agent | ||||
| @ -19,13 +23,6 @@ install_ubuntu() { | ||||
|     apt-get install -y libc++1 | ||||
|     apt-get install -y libc++abi1 | ||||
|  | ||||
|     # Make sure rocm packages from repo.radeon.com have highest priority | ||||
|     cat << EOF > /etc/apt/preferences.d/rocm-pin-600 | ||||
| Package: * | ||||
| Pin: release o=repo.radeon.com | ||||
| Pin-Priority: 600 | ||||
| EOF | ||||
|  | ||||
|     # Add amdgpu repository | ||||
|     UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'` | ||||
|     echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list | ||||
| @ -66,25 +63,17 @@ EOF | ||||
|     done | ||||
|  | ||||
|     # ROCm 6.3 had a regression where initializing static code objects had significant overhead | ||||
|     # ROCm 6.4 did not yet fix the regression, also HIP branch names are different | ||||
|     if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]] || [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then | ||||
|         if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then | ||||
|             HIP_BRANCH=rocm-6.3.x | ||||
|             VER_STR=6.3 | ||||
|         elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then | ||||
|             HIP_BRANCH=release/rocm-rel-6.4 | ||||
|             VER_STR=6.4 | ||||
|         fi | ||||
|     if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then | ||||
|         # clr build needs CppHeaderParser but can only find it using conda's python | ||||
|         /opt/conda/bin/python -m pip install CppHeaderParser | ||||
|         git clone https://github.com/ROCm/HIP -b $HIP_BRANCH | ||||
|         git clone https://github.com/ROCm/HIP -b rocm-6.3.x | ||||
|         HIP_COMMON_DIR=$(readlink -f HIP) | ||||
|         git clone https://github.com/jeffdaily/clr -b release/rocm-rel-${VER_STR}-statco-hotfix | ||||
|         git clone https://github.com/jeffdaily/clr -b release/rocm-rel-6.3-statco-hotfix | ||||
|         mkdir -p clr/build | ||||
|         pushd clr/build | ||||
|         cmake .. -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR | ||||
|         make -j | ||||
|         cp hipamd/lib/libamdhip64.so.${VER_STR}.* /opt/rocm/lib/libamdhip64.so.${VER_STR}.* | ||||
|         cp hipamd/lib/libamdhip64.so.6.3.* /opt/rocm/lib/libamdhip64.so.6.3.* | ||||
|         popd | ||||
|         rm -rf HIP clr | ||||
|     fi | ||||
|  | ||||
| @ -1,32 +1,50 @@ | ||||
| #!/usr/bin/env bash | ||||
| # Script used only in CD pipeline | ||||
| #!/bin/bash | ||||
| # Script used in CI and CD pipeline | ||||
|  | ||||
| set -eou pipefail | ||||
| set -ex | ||||
|  | ||||
| function do_install() { | ||||
|     rocm_version=$1 | ||||
|     rocm_version_nodot=${1//./} | ||||
| # Magma build scripts need `python` | ||||
| ln -sf /usr/bin/python3 /usr/bin/python | ||||
|  | ||||
|     # Version 2.7.2 + ROCm related updates | ||||
|     MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6 | ||||
|     magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2" | ||||
| ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') | ||||
| case "$ID" in | ||||
|   almalinux) | ||||
|     yum install -y gcc-gfortran | ||||
|     ;; | ||||
|   *) | ||||
|     echo "No preinstalls to build magma..." | ||||
|     ;; | ||||
| esac | ||||
|  | ||||
|     rocm_dir="/opt/rocm" | ||||
|     ( | ||||
|         set -x | ||||
|         tmp_dir=$(mktemp -d) | ||||
|         pushd ${tmp_dir} | ||||
|         curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive} | ||||
|         if tar -xvf "${magma_archive}" | ||||
|         then | ||||
|             mkdir -p "${rocm_dir}/magma" | ||||
|             mv include "${rocm_dir}/magma/include" | ||||
|             mv lib "${rocm_dir}/magma/lib" | ||||
|         else | ||||
|             echo "${magma_archive} not found, skipping magma install" | ||||
|         fi | ||||
|         popd | ||||
|     ) | ||||
| } | ||||
| MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION} | ||||
|  | ||||
| do_install $1 | ||||
| # "install" hipMAGMA into /opt/rocm/magma by copying after build | ||||
| git clone https://bitbucket.org/icl/magma.git | ||||
| pushd magma | ||||
|  | ||||
| # Version 2.7.2 + ROCm related updates | ||||
| git checkout a1625ff4d9bc362906bd01f805dbbe12612953f6 | ||||
|  | ||||
| cp make.inc-examples/make.inc.hip-gcc-mkl make.inc | ||||
| echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc | ||||
| if [[ -f "${MKLROOT}/lib/libmkl_core.a" ]]; then | ||||
|     echo 'LIB = -Wl,--start-group -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -Wl,--end-group -lpthread -lstdc++ -lm -lgomp -lhipblas -lhipsparse' >> make.inc | ||||
| fi | ||||
| echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib -ldl' >> make.inc | ||||
| echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc | ||||
| export PATH="${PATH}:/opt/rocm/bin" | ||||
| if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then | ||||
|   amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'` | ||||
| else | ||||
|   amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs` | ||||
| fi | ||||
| for arch in $amdgpu_targets; do | ||||
|   echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc | ||||
| done | ||||
| # hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition | ||||
| sed -i 's/^FOPENMP/#FOPENMP/g' make.inc | ||||
| make -f make.gen.hipMAGMA -j $(nproc) | ||||
| LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}" | ||||
| make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}" | ||||
| popd | ||||
| mv magma /opt/rocm | ||||
|  | ||||
							
								
								
									
										24
									
								
								.ci/docker/common/install_swiftshader.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										24
									
								
								.ci/docker/common/install_swiftshader.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,24 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| [ -n "${SWIFTSHADER}" ] | ||||
|  | ||||
| retry () { | ||||
|     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) | ||||
| } | ||||
|  | ||||
| _https_amazon_aws=https://ossci-android.s3.amazonaws.com | ||||
|  | ||||
| # SwiftShader | ||||
| _swiftshader_dir=/var/lib/jenkins/swiftshader | ||||
| _swiftshader_file_targz=swiftshader-abe07b943-prebuilt.tar.gz | ||||
| mkdir -p $_swiftshader_dir | ||||
| _tmp_swiftshader_targz="/tmp/${_swiftshader_file_targz}" | ||||
|  | ||||
| curl --silent --show-error --location --fail --retry 3 \ | ||||
|   --output "${_tmp_swiftshader_targz}" "$_https_amazon_aws/${_swiftshader_file_targz}" | ||||
|  | ||||
| tar -C "${_swiftshader_dir}" -xzf "${_tmp_swiftshader_targz}" | ||||
|  | ||||
| export VK_ICD_FILENAMES="${_swiftshader_dir}/build/Linux/vk_swiftshader_icd.json" | ||||
| @ -2,16 +2,14 @@ | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| mkdir -p /opt/triton | ||||
| if [ -z "${TRITON}" ] && [ -z "${TRITON_CPU}" ]; then | ||||
|   echo "TRITON and TRITON_CPU are not set. Exiting..." | ||||
|   exit 0 | ||||
| fi | ||||
|  | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" | ||||
|  | ||||
| get_pip_version() { | ||||
|   conda_run pip list | grep -w $* | head -n 1 | awk '{print $2}' | ||||
| get_conda_version() { | ||||
|   as_jenkins conda list -n py_$ANACONDA_PYTHON_VERSION | grep -w $* | head -n 1 | awk '{print $2}' | ||||
| } | ||||
|  | ||||
| conda_reinstall() { | ||||
|   as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y --force-reinstall $* | ||||
| } | ||||
|  | ||||
| if [ -n "${XPU_VERSION}" ]; then | ||||
| @ -33,9 +31,11 @@ if [ -n "${UBUNTU_VERSION}" ];then | ||||
|     apt-get install -y gpg-agent | ||||
| fi | ||||
|  | ||||
| # Keep the current cmake and numpy version here, so we can reinstall them later | ||||
| CMAKE_VERSION=$(get_pip_version cmake) | ||||
| NUMPY_VERSION=$(get_pip_version numpy) | ||||
| if [ -n "${CONDA_CMAKE}" ]; then | ||||
|   # Keep the current cmake and numpy version here, so we can reinstall them later | ||||
|   CMAKE_VERSION=$(get_conda_version cmake) | ||||
|   NUMPY_VERSION=$(get_conda_version numpy) | ||||
| fi | ||||
|  | ||||
| if [ -z "${MAX_JOBS}" ]; then | ||||
|     export MAX_JOBS=$(nproc) | ||||
| @ -52,7 +52,6 @@ cd triton | ||||
| as_jenkins git checkout ${TRITON_PINNED_COMMIT} | ||||
| as_jenkins git submodule update --init --recursive | ||||
| cd python | ||||
| pip_install pybind11==2.13.6 | ||||
|  | ||||
| # TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527 | ||||
| as_jenkins sed -i -e 's/https:\/\/tritonlang.blob.core.windows.net\/llvm-builds/https:\/\/oaitriton.blob.core.windows.net\/public\/llvm-builds/g' setup.py | ||||
| @ -61,35 +60,28 @@ if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" | ||||
|   # Triton needs at least gcc-9 to build | ||||
|   apt-get install -y g++-9 | ||||
|  | ||||
|   CXX=g++-9 conda_run python setup.py bdist_wheel | ||||
|   CXX=g++-9 pip_install . | ||||
| elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then | ||||
|   # Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain | ||||
|   add-apt-repository -y ppa:ubuntu-toolchain-r/test | ||||
|   apt-get install -y g++-9 | ||||
|  | ||||
|   CXX=g++-9 conda_run python setup.py bdist_wheel | ||||
|   CXX=g++-9 pip_install . | ||||
| else | ||||
|   conda_run python setup.py bdist_wheel | ||||
|   pip_install . | ||||
| fi | ||||
|  | ||||
| # Copy the wheel to /opt for multi stage docker builds | ||||
| cp dist/*.whl /opt/triton | ||||
| # Install the wheel for docker builds that don't use multi stage | ||||
| pip_install dist/*.whl | ||||
|  | ||||
| # TODO: This is to make sure that the same cmake and numpy version from install conda | ||||
| # script is used. Without this step, the newer cmake version (3.25.2) downloaded by | ||||
| # triton build step via pip will fail to detect conda MKL. Once that issue is fixed, | ||||
| # this can be removed. | ||||
| # | ||||
| # The correct numpy version also needs to be set here because conda claims that it | ||||
| # causes inconsistent environment.  Without this, conda will attempt to install the | ||||
| # latest numpy version, which fails ASAN tests with the following import error: Numba | ||||
| # needs NumPy 1.20 or less. | ||||
| # Note that we install numpy with pip as conda might not have the version we want | ||||
| if [ -n "${CMAKE_VERSION}" ]; then | ||||
|   pip_install "cmake==${CMAKE_VERSION}" | ||||
| fi | ||||
| if [ -n "${NUMPY_VERSION}" ]; then | ||||
|   pip_install "numpy==${NUMPY_VERSION}" | ||||
| if [ -n "${CONDA_CMAKE}" ]; then | ||||
|   # TODO: This is to make sure that the same cmake and numpy version from install conda | ||||
|   # script is used. Without this step, the newer cmake version (3.25.2) downloaded by | ||||
|   # triton build step via pip will fail to detect conda MKL. Once that issue is fixed, | ||||
|   # this can be removed. | ||||
|   # | ||||
|   # The correct numpy version also needs to be set here because conda claims that it | ||||
|   # causes inconsistent environment.  Without this, conda will attempt to install the | ||||
|   # latest numpy version, which fails ASAN tests with the following import error: Numba | ||||
|   # needs NumPy 1.20 or less. | ||||
|   conda_reinstall cmake="${CMAKE_VERSION}" | ||||
|   # Note that we install numpy with pip as conda might not have the version we want | ||||
|   pip_install --force-reinstall numpy=="${NUMPY_VERSION}" | ||||
| fi | ||||
|  | ||||
							
								
								
									
										24
									
								
								.ci/docker/common/install_vulkan_sdk.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										24
									
								
								.ci/docker/common/install_vulkan_sdk.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,24 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| set -ex | ||||
|  | ||||
| [ -n "${VULKAN_SDK_VERSION}" ] | ||||
|  | ||||
| retry () { | ||||
|     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) | ||||
| } | ||||
|  | ||||
| _vulkansdk_dir=/var/lib/jenkins/vulkansdk | ||||
| _tmp_vulkansdk_targz=/tmp/vulkansdk.tar.gz | ||||
|  | ||||
| curl \ | ||||
|   --silent \ | ||||
|   --show-error \ | ||||
|   --location \ | ||||
|   --fail \ | ||||
|   --retry 3 \ | ||||
|   --output "${_tmp_vulkansdk_targz}" "https://ossci-android.s3.amazonaws.com/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz" | ||||
|  | ||||
| mkdir -p "${_vulkansdk_dir}" | ||||
| tar -C "${_vulkansdk_dir}" -xzf "${_tmp_vulkansdk_targz}" --strip-components 1 | ||||
| rm -rf "${_tmp_vulkansdk_targz}" | ||||
| @ -26,7 +26,7 @@ function install_ubuntu() { | ||||
|     wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \ | ||||
|         | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg.gpg | ||||
|     echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg.gpg] \ | ||||
|         https://apt.repos.intel.com/oneapi all main" \ | ||||
|         https://apt.repos.intel.com/${XPU_REPO_NAME} all main" \ | ||||
|         | tee /etc/apt/sources.list.d/oneAPI.list | ||||
|  | ||||
|     # Update the packages list and repository index | ||||
| @ -74,7 +74,7 @@ function install_rhel() { | ||||
|     tee > /etc/yum.repos.d/oneAPI.repo << EOF | ||||
| [oneAPI] | ||||
| name=Intel for Pytorch GPU dev repository | ||||
| baseurl=https://yum.repos.intel.com/oneapi | ||||
| baseurl=https://yum.repos.intel.com/${XPU_REPO_NAME} | ||||
| enabled=1 | ||||
| gpgcheck=1 | ||||
| repo_gpgcheck=1 | ||||
| @ -118,7 +118,7 @@ function install_sles() { | ||||
|         https://repositories.intel.com/gpu/sles/${VERSION_SP}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_SP}.repo | ||||
|     rpm --import https://repositories.intel.com/gpu/intel-graphics.key | ||||
|     # To add the online network network package repository for the Intel Support Packages | ||||
|     zypper addrepo https://yum.repos.intel.com/oneapi oneAPI | ||||
|     zypper addrepo https://yum.repos.intel.com/${XPU_REPO_NAME} oneAPI | ||||
|     rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | ||||
|  | ||||
|     # The xpu-smi packages | ||||
| @ -141,10 +141,10 @@ if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then | ||||
|     XPU_DRIVER_VERSION="" | ||||
| fi | ||||
|  | ||||
| # Default use Intel® oneAPI Deep Learning Essentials 2025.0 | ||||
| if [[ "$XPU_VERSION" == "2025.1" ]]; then | ||||
|     XPU_PACKAGES="intel-deep-learning-essentials-2025.1" | ||||
| else | ||||
| XPU_REPO_NAME="intel-for-pytorch-gpu-dev" | ||||
| XPU_PACKAGES="intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9" | ||||
| if [[ "$XPU_VERSION" == "2025.0" ]]; then | ||||
|     XPU_REPO_NAME="oneapi" | ||||
|     XPU_PACKAGES="intel-deep-learning-essentials-2025.0" | ||||
| fi | ||||
|  | ||||
|  | ||||
| @ -49,9 +49,6 @@ RUN bash ./install_mkl.sh && rm install_mkl.sh | ||||
| FROM cpu as cuda | ||||
| ADD ./common/install_cuda.sh install_cuda.sh | ||||
| ADD ./common/install_magma.sh install_magma.sh | ||||
| COPY ./common/install_nccl.sh install_nccl.sh | ||||
| COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/ | ||||
| COPY ./common/install_cusparselt.sh install_cusparselt.sh | ||||
| ENV CUDA_HOME /usr/local/cuda | ||||
|  | ||||
| FROM cuda as cuda11.8 | ||||
| @ -75,7 +72,6 @@ RUN bash ./install_magma.sh 12.8 | ||||
| RUN ln -sf /usr/local/cuda-12.8 /usr/local/cuda | ||||
|  | ||||
| FROM cpu as rocm | ||||
| ARG ROCM_VERSION | ||||
| ARG PYTORCH_ROCM_ARCH | ||||
| ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} | ||||
| ENV MKLROOT /opt/intel | ||||
| @ -90,11 +86,11 @@ ADD ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| # gfortran and python needed for building magma from source for ROCm | ||||
| RUN apt-get update -y && \ | ||||
|     apt-get install gfortran -y && \ | ||||
|     apt-get install python3 python-is-python3 -y && \ | ||||
|     apt-get install python -y && \ | ||||
|     apt-get clean | ||||
|  | ||||
| RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh | ||||
| RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh | ||||
|  | ||||
| FROM ${BASE_TARGET} as final | ||||
| COPY --from=openssl            /opt/openssl           /opt/openssl | ||||
|  | ||||
| @ -1,63 +1,83 @@ | ||||
| #!/usr/bin/env bash | ||||
| # Script used only in CD pipeline | ||||
|  | ||||
| set -eoux pipefail | ||||
| set -eou pipefail | ||||
|  | ||||
| image="$1" | ||||
| shift | ||||
|  | ||||
| if [ -z "${image}" ]; then | ||||
|   echo "Usage: $0 IMAGENAME:ARCHTAG" | ||||
|   echo "Usage: $0 IMAGE" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| DOCKER_IMAGE="pytorch/${image}" | ||||
|  | ||||
| TOPDIR=$(git rev-parse --show-toplevel) | ||||
|  | ||||
| GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu} | ||||
| GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-} | ||||
|  | ||||
| WITH_PUSH=${WITH_PUSH:-} | ||||
|  | ||||
| DOCKER=${DOCKER:-docker} | ||||
|  | ||||
| # Go from imagename:tag to tag | ||||
| DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}') | ||||
|  | ||||
| GPU_ARCH_VERSION="" | ||||
| if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then | ||||
|     # extract cuda version from image name.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8 | ||||
|     GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}') | ||||
| elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then | ||||
|     # extract rocm version from image name.  e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4 | ||||
|     GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}') | ||||
| fi | ||||
|  | ||||
| case ${DOCKER_TAG_PREFIX} in | ||||
| case ${GPU_ARCH_TYPE} in | ||||
|     cpu) | ||||
|         BASE_TARGET=cpu | ||||
|         DOCKER_TAG=cpu | ||||
|         GPU_IMAGE=ubuntu:20.04 | ||||
|         DOCKER_GPU_BUILD_ARG="" | ||||
|         ;; | ||||
|     cuda*) | ||||
|     cuda) | ||||
|         BASE_TARGET=cuda${GPU_ARCH_VERSION} | ||||
|         DOCKER_TAG=cuda${GPU_ARCH_VERSION} | ||||
|         GPU_IMAGE=ubuntu:20.04 | ||||
|         DOCKER_GPU_BUILD_ARG="" | ||||
|         ;; | ||||
|     rocm*) | ||||
|     rocm) | ||||
|         BASE_TARGET=rocm | ||||
|         GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete | ||||
|         DOCKER_TAG=rocm${GPU_ARCH_VERSION} | ||||
|         GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}" | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}" | ||||
|         ;; | ||||
|     *) | ||||
|         echo "ERROR: Unrecognized DOCKER_TAG_PREFIX: ${DOCKER_TAG_PREFIX}" | ||||
|         echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}" | ||||
|         exit 1 | ||||
|         ;; | ||||
| esac | ||||
|  | ||||
| tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]') | ||||
|  | ||||
| DOCKER_BUILDKIT=1 ${DOCKER} build \ | ||||
|     --target final \ | ||||
|     ${DOCKER_GPU_BUILD_ARG} \ | ||||
|     --build-arg "GPU_IMAGE=${GPU_IMAGE}" \ | ||||
|     --build-arg "BASE_TARGET=${BASE_TARGET}" \ | ||||
|     -t "${tmp_tag}" \ | ||||
|     $@ \ | ||||
|     -f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \ | ||||
|     "${TOPDIR}/.ci/docker/" | ||||
| ( | ||||
|     set -x | ||||
|     DOCKER_BUILDKIT=1 ${DOCKER} build \ | ||||
|          --target final \ | ||||
|         ${DOCKER_GPU_BUILD_ARG} \ | ||||
|         --build-arg "GPU_IMAGE=${GPU_IMAGE}" \ | ||||
|         --build-arg "BASE_TARGET=${BASE_TARGET}" \ | ||||
|         -t "${DOCKER_IMAGE}" \ | ||||
|         $@ \ | ||||
|         -f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \ | ||||
|         "${TOPDIR}/.ci/docker/" | ||||
|  | ||||
| ) | ||||
|  | ||||
| GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)} | ||||
| GIT_BRANCH_NAME=${GITHUB_REF##*/} | ||||
| GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)} | ||||
| DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME} | ||||
| DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA} | ||||
|  | ||||
| if [[ "${WITH_PUSH}" == true ]]; then | ||||
|   ( | ||||
|     set -x | ||||
|     ${DOCKER} push "${DOCKER_IMAGE}" | ||||
|     if [[ -n ${GITHUB_REF} ]]; then | ||||
|         ${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG} | ||||
|         ${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG} | ||||
|         ${DOCKER} push "${DOCKER_IMAGE_BRANCH_TAG}" | ||||
|         ${DOCKER} push "${DOCKER_IMAGE_SHA_TAG}" | ||||
|     fi | ||||
|   ) | ||||
| fi | ||||
|  | ||||
| @ -18,31 +18,28 @@ COPY ./common/install_user.sh install_user.sh | ||||
| RUN bash ./install_user.sh && rm install_user.sh | ||||
|  | ||||
| # Install conda and other packages (e.g., numpy, pytest) | ||||
| ARG PYTHON_VERSION | ||||
| ARG PIP_CMAKE | ||||
| # Put venv into the env vars so users don't need to activate it | ||||
| ENV PATH /var/lib/jenkins/ci_env/bin:$PATH | ||||
| ENV VIRTUAL_ENV /var/lib/jenkins/ci_env | ||||
| COPY requirements-ci.txt /opt/requirements-ci.txt | ||||
| COPY ./common/install_python.sh install_python.sh | ||||
| RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ARG CONDA_CMAKE | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH | ||||
| COPY requirements-ci.txt /opt/conda/requirements-ci.txt | ||||
| COPY ./common/install_conda.sh install_conda.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ./common/install_magma_conda.sh install_magma_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt | ||||
|  | ||||
| # Install cuda and cudnn | ||||
| ARG CUDA_VERSION | ||||
| COPY ./common/install_cuda.sh install_cuda.sh | ||||
| COPY ./common/install_nccl.sh install_nccl.sh | ||||
| COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/ | ||||
| COPY ./common/install_cusparselt.sh install_cusparselt.sh | ||||
| RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu* install_cusparselt.sh | ||||
| RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh | ||||
| ENV DESIRED_CUDA ${CUDA_VERSION} | ||||
| ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH | ||||
|  | ||||
| # Note that Docker build forbids copying file outside the build context | ||||
| COPY ./common/install_linter.sh install_linter.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| RUN bash ./install_linter.sh | ||||
| RUN rm install_linter.sh | ||||
|  | ||||
| RUN chown -R jenkins:jenkins /var/lib/jenkins/ci_env | ||||
| RUN rm install_linter.sh common_utils.sh | ||||
|  | ||||
| USER jenkins | ||||
| CMD ["bash"] | ||||
|  | ||||
| @ -15,17 +15,20 @@ COPY ./common/install_user.sh install_user.sh | ||||
| RUN bash ./install_user.sh && rm install_user.sh | ||||
|  | ||||
| # Install conda and other packages (e.g., numpy, pytest) | ||||
| ARG PYTHON_VERSION | ||||
| ENV PATH /var/lib/jenkins/ci_env/bin:$PATH | ||||
| ENV VIRTUAL_ENV /var/lib/jenkins/ci_env | ||||
| COPY requirements-ci.txt /opt/requirements-ci.txt | ||||
| COPY ./common/install_python.sh install_python.sh | ||||
| RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ARG CONDA_CMAKE | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH | ||||
| COPY requirements-ci.txt /opt/conda/requirements-ci.txt | ||||
| COPY ./common/install_conda.sh install_conda.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt | ||||
|  | ||||
| # Note that Docker build forbids copying file outside the build context | ||||
| COPY ./common/install_linter.sh install_linter.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| RUN bash ./install_linter.sh | ||||
| RUN rm install_linter.sh | ||||
| RUN rm install_linter.sh common_utils.sh | ||||
|  | ||||
| USER jenkins | ||||
| CMD ["bash"] | ||||
|  | ||||
							
								
								
									
										200
									
								
								.ci/docker/manywheel/Dockerfile
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										200
									
								
								.ci/docker/manywheel/Dockerfile
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,200 @@ | ||||
| # syntax = docker/dockerfile:experimental | ||||
| ARG ROCM_VERSION=3.7 | ||||
| ARG BASE_CUDA_VERSION=11.8 | ||||
|  | ||||
| ARG GPU_IMAGE=centos:7 | ||||
| FROM centos:7 as base | ||||
|  | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
|  | ||||
| ARG DEVTOOLSET_VERSION=9 | ||||
|  | ||||
| # Note: This is required patch since CentOS have reached EOL | ||||
| # otherwise any yum install setp will fail | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
| RUN yum install -y wget curl perl util-linux xz bzip2 git patch which perl zlib-devel | ||||
| # Just add everything as a safe.directory for git since these will be used in multiple places with git | ||||
| RUN git config --global --add safe.directory '*' | ||||
| RUN yum install -y yum-utils centos-release-scl | ||||
| RUN yum-config-manager --enable rhel-server-rhscl-7-rpms | ||||
| # Note: After running yum-config-manager --enable rhel-server-rhscl-7-rpms | ||||
| # patch is required once again. Somehow this steps adds mirror.centos.org | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
| RUN yum install -y devtoolset-${DEVTOOLSET_VERSION}-gcc devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran devtoolset-${DEVTOOLSET_VERSION}-binutils | ||||
| ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
|  | ||||
| RUN yum --enablerepo=extras install -y epel-release | ||||
|  | ||||
| # cmake-3.18.4 from pip | ||||
| RUN yum install -y python3-pip && \ | ||||
|     python3 -mpip install cmake==3.18.4 && \ | ||||
|     ln -s /usr/local/bin/cmake /usr/bin/cmake | ||||
|  | ||||
| RUN yum install -y autoconf aclocal automake make sudo | ||||
|  | ||||
| FROM base as openssl | ||||
| # Install openssl (this must precede `build python` step) | ||||
| # (In order to have a proper SSL module, Python is compiled | ||||
| # against a recent openssl [see env vars above], which is linked | ||||
| # statically. We delete openssl afterwards.) | ||||
| ADD ./common/install_openssl.sh install_openssl.sh | ||||
| RUN bash ./install_openssl.sh && rm install_openssl.sh | ||||
|  | ||||
| # EPEL for cmake | ||||
| FROM base as patchelf | ||||
| # Install patchelf | ||||
| ADD ./common/install_patchelf.sh install_patchelf.sh | ||||
| RUN bash ./install_patchelf.sh && rm install_patchelf.sh | ||||
| RUN cp $(which patchelf) /patchelf | ||||
|  | ||||
| FROM patchelf as python | ||||
| # build python | ||||
| COPY manywheel/build_scripts /build_scripts | ||||
| ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh | ||||
| RUN bash build_scripts/build.sh && rm -r build_scripts | ||||
|  | ||||
| FROM base as cuda | ||||
| ARG BASE_CUDA_VERSION=10.2 | ||||
| # Install CUDA | ||||
| ADD ./common/install_cuda.sh install_cuda.sh | ||||
| RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh | ||||
|  | ||||
| FROM base as intel | ||||
| # MKL | ||||
| ADD ./common/install_mkl.sh install_mkl.sh | ||||
| RUN bash ./install_mkl.sh && rm install_mkl.sh | ||||
|  | ||||
| FROM base as magma | ||||
| ARG BASE_CUDA_VERSION=10.2 | ||||
| # Install magma | ||||
| ADD ./common/install_magma.sh install_magma.sh | ||||
| RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh | ||||
|  | ||||
| FROM base as jni | ||||
| # Install java jni header | ||||
| ADD ./common/install_jni.sh install_jni.sh | ||||
| ADD ./java/jni.h jni.h | ||||
| RUN bash ./install_jni.sh && rm install_jni.sh | ||||
|  | ||||
| FROM base as libpng | ||||
| # Install libpng | ||||
| ADD ./common/install_libpng.sh install_libpng.sh | ||||
| RUN bash ./install_libpng.sh && rm install_libpng.sh | ||||
|  | ||||
| FROM ${GPU_IMAGE} as common | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
| RUN yum install -y \ | ||||
|         aclocal \ | ||||
|         autoconf \ | ||||
|         automake \ | ||||
|         bison \ | ||||
|         bzip2 \ | ||||
|         curl \ | ||||
|         diffutils \ | ||||
|         file \ | ||||
|         git \ | ||||
|         make \ | ||||
|         patch \ | ||||
|         perl \ | ||||
|         unzip \ | ||||
|         util-linux \ | ||||
|         wget \ | ||||
|         which \ | ||||
|         xz \ | ||||
|         yasm | ||||
| RUN yum install -y \ | ||||
|     https://repo.ius.io/ius-release-el7.rpm \ | ||||
|     https://ossci-linux.s3.amazonaws.com/epel-release-7-14.noarch.rpm | ||||
|  | ||||
| RUN yum swap -y git git236-core | ||||
| # git236+ would refuse to run git commands in repos owned by other users | ||||
| # Which causes version check to fail, as pytorch repo is bind-mounted into the image | ||||
| # Override this behaviour by treating every folder as safe | ||||
| # For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327 | ||||
| RUN git config --global --add safe.directory "*" | ||||
|  | ||||
| ENV SSL_CERT_FILE=/opt/_internal/certs.pem | ||||
| # Install LLVM version | ||||
| COPY --from=openssl            /opt/openssl                          /opt/openssl | ||||
| COPY --from=python             /opt/python                           /opt/python | ||||
| COPY --from=python             /opt/_internal                        /opt/_internal | ||||
| COPY --from=python             /opt/python/cp39-cp39/bin/auditwheel /usr/local/bin/auditwheel | ||||
| COPY --from=intel              /opt/intel                            /opt/intel | ||||
| COPY --from=patchelf           /usr/local/bin/patchelf               /usr/local/bin/patchelf | ||||
| COPY --from=jni                /usr/local/include/jni.h              /usr/local/include/jni.h | ||||
| COPY --from=libpng             /usr/local/bin/png*                   /usr/local/bin/ | ||||
| COPY --from=libpng             /usr/local/bin/libpng*                /usr/local/bin/ | ||||
| COPY --from=libpng             /usr/local/include/png*               /usr/local/include/ | ||||
| COPY --from=libpng             /usr/local/include/libpng*            /usr/local/include/ | ||||
| COPY --from=libpng             /usr/local/lib/libpng*                /usr/local/lib/ | ||||
| COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/lib/pkgconfig | ||||
|  | ||||
| FROM common as cpu_final | ||||
| ARG BASE_CUDA_VERSION=10.1 | ||||
| ARG DEVTOOLSET_VERSION=9 | ||||
| # Install Anaconda | ||||
| ADD ./common/install_conda_docker.sh install_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh | ||||
| ENV PATH /opt/conda/bin:$PATH | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
|  | ||||
| RUN yum install -y yum-utils centos-release-scl | ||||
| RUN yum-config-manager --enable rhel-server-rhscl-7-rpms | ||||
| RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo | ||||
| RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo | ||||
| RUN yum install -y devtoolset-${DEVTOOLSET_VERSION}-gcc devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran devtoolset-${DEVTOOLSET_VERSION}-binutils | ||||
| ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
|  | ||||
| # cmake is already installed inside the rocm base image, so remove if present | ||||
| RUN rpm -e cmake || true | ||||
| # cmake-3.18.4 from pip | ||||
| RUN yum install -y python3-pip && \ | ||||
|     python3 -mpip install cmake==3.18.4 && \ | ||||
|     ln -s /usr/local/bin/cmake /usr/bin/cmake | ||||
|  | ||||
| # ninja | ||||
| RUN yum install -y ninja-build | ||||
|  | ||||
| FROM cpu_final as cuda_final | ||||
| RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION} | ||||
| RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda | ||||
| ENV PATH=/usr/local/cuda/bin:$PATH | ||||
|  | ||||
| FROM cpu_final as rocm_final | ||||
| ARG ROCM_VERSION=3.7 | ||||
| ARG PYTORCH_ROCM_ARCH | ||||
| ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH} | ||||
| # Adding ROCM_PATH env var so that LoadHip.cmake (even with logic updated for ROCm6.0) | ||||
| # find HIP works for ROCm5.7. Not needed for ROCm6.0 and above. | ||||
| # Remove below when ROCm5.7 is not in support matrix anymore. | ||||
| ENV ROCM_PATH /opt/rocm | ||||
| ENV MKLROOT /opt/intel | ||||
| # No need to install ROCm as base docker image should have full ROCm install | ||||
| #ADD ./common/install_rocm.sh install_rocm.sh | ||||
| #RUN ROCM_VERSION=${ROCM_VERSION} bash ./install_rocm.sh && rm install_rocm.sh | ||||
| ADD ./common/install_rocm_drm.sh install_rocm_drm.sh | ||||
| RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh | ||||
| # cmake3 is needed for the MIOpen build | ||||
| RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3 | ||||
| ADD ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh | ||||
| ADD ./common/install_miopen.sh install_miopen.sh | ||||
| RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh | ||||
| @ -7,8 +7,8 @@ ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
|  | ||||
| ARG DEVTOOLSET_VERSION=13 | ||||
| RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel yum-utils gcc-toolset-${DEVTOOLSET_VERSION}-gcc gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran gcc-toolset-${DEVTOOLSET_VERSION}-gdb | ||||
| ARG DEVTOOLSET_VERSION=11 | ||||
| RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel yum-utils gcc-toolset-${DEVTOOLSET_VERSION}-toolchain | ||||
| ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
|  | ||||
| @ -33,13 +33,10 @@ RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 | ||||
| RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 | ||||
|  | ||||
| FROM base as cuda | ||||
| ARG BASE_CUDA_VERSION=12.6 | ||||
| ARG BASE_CUDA_VERSION=11.8 | ||||
| # Install CUDA | ||||
| ADD ./common/install_cuda.sh install_cuda.sh | ||||
| COPY ./common/install_nccl.sh install_nccl.sh | ||||
| COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/ | ||||
| COPY ./common/install_cusparselt.sh install_cusparselt.sh | ||||
| RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu* install_cusparselt.sh | ||||
| RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh | ||||
|  | ||||
| FROM base as intel | ||||
| # MKL | ||||
| @ -47,7 +44,7 @@ ADD ./common/install_mkl.sh install_mkl.sh | ||||
| RUN bash ./install_mkl.sh && rm install_mkl.sh | ||||
|  | ||||
| FROM base as magma | ||||
| ARG BASE_CUDA_VERSION=12.6 | ||||
| ARG BASE_CUDA_VERSION=10.2 | ||||
| # Install magma | ||||
| ADD ./common/install_magma.sh install_magma.sh | ||||
| RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh | ||||
| @ -64,7 +61,7 @@ ADD ./common/install_libpng.sh install_libpng.sh | ||||
| RUN bash ./install_libpng.sh && rm install_libpng.sh | ||||
|  | ||||
| FROM ${GPU_IMAGE} as common | ||||
| ARG DEVTOOLSET_VERSION=13 | ||||
| ARG DEVTOOLSET_VERSION=11 | ||||
| ENV LC_ALL en_US.UTF-8 | ||||
| ENV LANG en_US.UTF-8 | ||||
| ENV LANGUAGE en_US.UTF-8 | ||||
| @ -87,12 +84,13 @@ RUN yum install -y \ | ||||
|         wget \ | ||||
|         which \ | ||||
|         xz \ | ||||
|         glibc-langpack-en \ | ||||
|         gcc-toolset-${DEVTOOLSET_VERSION}-gcc \ | ||||
|         gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \ | ||||
|         gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \ | ||||
|         gcc-toolset-${DEVTOOLSET_VERSION}-gdb | ||||
|         gcc-toolset-${DEVTOOLSET_VERSION}-toolchain \ | ||||
|         glibc-langpack-en | ||||
| RUN yum install -y \ | ||||
|     https://repo.ius.io/ius-release-el7.rpm \ | ||||
|     https://ossci-linux.s3.amazonaws.com/epel-release-7-14.noarch.rpm | ||||
|  | ||||
| RUN yum swap -y git git236-core | ||||
| # git236+ would refuse to run git commands in repos owned by other users | ||||
| # Which causes version check to fail, as pytorch repo is bind-mounted into the image | ||||
| # Override this behaviour by treating every folder as safe | ||||
| @ -116,8 +114,8 @@ COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/ | ||||
| COPY --from=jni                /usr/local/include/jni.h              /usr/local/include/jni.h | ||||
|  | ||||
| FROM common as cpu_final | ||||
| ARG BASE_CUDA_VERSION=12.6 | ||||
| ARG DEVTOOLSET_VERSION=13 | ||||
| ARG BASE_CUDA_VERSION=11.8 | ||||
| ARG DEVTOOLSET_VERSION=11 | ||||
| # Install Anaconda | ||||
| ADD ./common/install_conda_docker.sh install_conda.sh | ||||
| RUN bash ./install_conda.sh && rm install_conda.sh | ||||
| @ -156,14 +154,11 @@ ENV ROCM_PATH /opt/rocm | ||||
| # and avoid 3.21.0 cmake+ninja issues with ninja inserting "-Wl,--no-as-needed" in LINK_FLAGS for static linker | ||||
| RUN python3 -m pip install --upgrade pip && \ | ||||
|     python3 -mpip install cmake==3.28.4 | ||||
| # replace the libdrm in /opt/amdgpu with custom amdgpu.ids lookup path | ||||
| ADD ./common/install_rocm_drm.sh install_rocm_drm.sh | ||||
| RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh | ||||
| # ROCm 6.4 rocm-smi depends on system drm.h header | ||||
| RUN yum install -y libdrm-devel | ||||
| ENV MKLROOT /opt/intel | ||||
| ADD ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh | ||||
| ADD ./common/install_miopen.sh install_miopen.sh | ||||
| RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh | ||||
|  | ||||
| @ -174,6 +169,6 @@ ENV XPU_DRIVER_TYPE ROLLING | ||||
| RUN python3 -m pip install --upgrade pip && \ | ||||
|     python3 -mpip install cmake==3.28.4 | ||||
| ADD ./common/install_xpu.sh install_xpu.sh | ||||
| ENV XPU_VERSION 2025.1 | ||||
| ENV XPU_VERSION 2025.0 | ||||
| RUN bash ./install_xpu.sh && rm install_xpu.sh | ||||
| RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| FROM quay.io/pypa/manylinux_2_28_aarch64 as base | ||||
|  | ||||
| ARG GCCTOOLSET_VERSION=13 | ||||
| # Graviton needs GCC 10 or above for the build. GCC12 is the default version in almalinux-8. | ||||
| ARG GCCTOOLSET_VERSION=11 | ||||
|  | ||||
| # Language variabes | ||||
| ENV LC_ALL=en_US.UTF-8 | ||||
| @ -35,10 +36,7 @@ RUN yum install -y \ | ||||
|   yasm \ | ||||
|   zstd \ | ||||
|   sudo \ | ||||
|   gcc-toolset-${GCCTOOLSET_VERSION}-gcc \ | ||||
|   gcc-toolset-${GCCTOOLSET_VERSION}-gcc-c++ \ | ||||
|   gcc-toolset-${GCCTOOLSET_VERSION}-gcc-gfortran \ | ||||
|   gcc-toolset-${GCCTOOLSET_VERSION}-gdb | ||||
|   gcc-toolset-${GCCTOOLSET_VERSION}-toolchain | ||||
|  | ||||
| # (optional) Install non-default Ninja version | ||||
| ARG NINJA_VERSION | ||||
|  | ||||
							
								
								
									
										94
									
								
								.ci/docker/manywheel/Dockerfile_aarch64
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										94
									
								
								.ci/docker/manywheel/Dockerfile_aarch64
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,94 @@ | ||||
| FROM quay.io/pypa/manylinux2014_aarch64 as base | ||||
|  | ||||
|  | ||||
| # Graviton needs GCC 10 for the build | ||||
| ARG DEVTOOLSET_VERSION=10 | ||||
|  | ||||
| # Language variabes | ||||
| ENV LC_ALL=en_US.UTF-8 | ||||
| ENV LANG=en_US.UTF-8 | ||||
| ENV LANGUAGE=en_US.UTF-8 | ||||
|  | ||||
| # Installed needed OS packages. This is to support all | ||||
| # the binary builds (torch, vision, audio, text, data) | ||||
| RUN yum -y install epel-release | ||||
| RUN yum -y update | ||||
| RUN yum install -y \ | ||||
|   autoconf \ | ||||
|   automake \ | ||||
|   bison \ | ||||
|   bzip2 \ | ||||
|   curl \ | ||||
|   diffutils \ | ||||
|   file \ | ||||
|   git \ | ||||
|   make \ | ||||
|   patch \ | ||||
|   perl \ | ||||
|   unzip \ | ||||
|   util-linux \ | ||||
|   wget \ | ||||
|   which \ | ||||
|   xz \ | ||||
|   yasm \ | ||||
|   less \ | ||||
|   zstd \ | ||||
|   libgomp \ | ||||
|   sudo \ | ||||
|   devtoolset-${DEVTOOLSET_VERSION}-gcc \ | ||||
|   devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ \ | ||||
|   devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran \ | ||||
|   devtoolset-${DEVTOOLSET_VERSION}-binutils | ||||
|  | ||||
| # Ensure the expected devtoolset is used | ||||
| ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
|  | ||||
|  | ||||
| # git236+ would refuse to run git commands in repos owned by other users | ||||
| # Which causes version check to fail, as pytorch repo is bind-mounted into the image | ||||
| # Override this behaviour by treating every folder as safe | ||||
| # For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327 | ||||
| RUN git config --global --add safe.directory "*" | ||||
|  | ||||
|  | ||||
| ############################################################################### | ||||
| # libglfortran.a hack | ||||
| # | ||||
| # libgfortran.a from quay.io/pypa/manylinux2014_aarch64 is not compiled with -fPIC. | ||||
| # This causes __stack_chk_guard@@GLIBC_2.17 on pytorch build. To solve, get | ||||
| # ubuntu's libgfortran.a which is compiled with -fPIC | ||||
| # NOTE: Need a better way to get this library as Ubuntu's package can be removed by the vender, or changed | ||||
| ############################################################################### | ||||
| RUN cd ~/ \ | ||||
|   && curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-4ubuntu2_arm64.deb \ | ||||
|   && ar x ~/libgfortran-10-dev.deb \ | ||||
|   && tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ \ | ||||
|   && cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/ | ||||
|  | ||||
| # install cmake | ||||
| RUN yum install -y cmake3 && \ | ||||
|     ln -s /usr/bin/cmake3 /usr/bin/cmake | ||||
|  | ||||
| FROM base as openssl | ||||
| # Install openssl (this must precede `build python` step) | ||||
| # (In order to have a proper SSL module, Python is compiled | ||||
| # against a recent openssl [see env vars above], which is linked | ||||
| # statically. We delete openssl afterwards.) | ||||
| ADD ./common/install_openssl.sh install_openssl.sh | ||||
| RUN bash ./install_openssl.sh && rm install_openssl.sh | ||||
| ENV SSL_CERT_FILE=/opt/_internal/certs.pem | ||||
|  | ||||
| FROM base as openblas | ||||
| # Install openblas | ||||
| ADD ./common/install_openblas.sh install_openblas.sh | ||||
| RUN bash ./install_openblas.sh && rm install_openblas.sh | ||||
|  | ||||
| FROM openssl as final | ||||
| # remove unncessary python versions | ||||
| RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2 | ||||
| RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4 | ||||
| RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6 | ||||
| RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 | ||||
| COPY --from=openblas     /opt/OpenBLAS/  /opt/OpenBLAS/ | ||||
| ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH | ||||
| @ -1,7 +1,7 @@ | ||||
| FROM quay.io/pypa/manylinux_2_28_aarch64 as base | ||||
|  | ||||
| # Cuda ARM build needs gcc 11 | ||||
| ARG DEVTOOLSET_VERSION=13 | ||||
| ARG DEVTOOLSET_VERSION=11 | ||||
|  | ||||
| # Language variables | ||||
| ENV LC_ALL=en_US.UTF-8 | ||||
| @ -34,10 +34,7 @@ RUN yum install -y \ | ||||
|   zstd \ | ||||
|   libgomp \ | ||||
|   sudo \ | ||||
|   gcc-toolset-${DEVTOOLSET_VERSION}-gcc \ | ||||
|   gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \ | ||||
|   gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \ | ||||
|   gcc-toolset-${DEVTOOLSET_VERSION}-gdb | ||||
|   gcc-toolset-${DEVTOOLSET_VERSION}-toolchain | ||||
|  | ||||
| # Ensure the expected devtoolset is used | ||||
| ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| @ -69,11 +66,8 @@ RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6 | ||||
| FROM base as cuda | ||||
| ARG BASE_CUDA_VERSION | ||||
| # Install CUDA | ||||
| ADD ./common/install_cuda.sh install_cuda.sh | ||||
| COPY ./common/install_nccl.sh install_nccl.sh | ||||
| COPY ./common/install_cusparselt.sh install_cusparselt.sh | ||||
| COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/ | ||||
| RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu* install_cusparselt.sh | ||||
| ADD ./common/install_cuda_aarch64.sh install_cuda_aarch64.sh | ||||
| RUN bash ./install_cuda_aarch64.sh ${BASE_CUDA_VERSION} && rm install_cuda_aarch64.sh | ||||
|  | ||||
| FROM base as magma | ||||
| ARG BASE_CUDA_VERSION | ||||
|  | ||||
| @ -5,9 +5,7 @@ ENV LC_ALL=C.UTF-8 | ||||
| ENV LANG=C.UTF-8 | ||||
| ENV LANGUAGE=C.UTF-8 | ||||
|  | ||||
| # there is a bugfix in gcc >= 14 for precompiled headers and s390x vectorization interaction. | ||||
| # with earlier gcc versions test/inductor/test_cpu_cpp_wrapper.py will fail. | ||||
| ARG DEVTOOLSET_VERSION=14 | ||||
| ARG DEVTOOLSET_VERSION=13 | ||||
| # Installed needed OS packages. This is to support all | ||||
| # the binary builds (torch, vision, audio, text, data) | ||||
| RUN yum -y install epel-release | ||||
| @ -44,7 +42,6 @@ RUN yum install -y \ | ||||
|   llvm-devel \ | ||||
|   libzstd-devel \ | ||||
|   python3.12-devel \ | ||||
|   python3.12-test \ | ||||
|   python3.12-setuptools \ | ||||
|   python3.12-pip \ | ||||
|   python3-virtualenv \ | ||||
| @ -60,8 +57,7 @@ RUN yum install -y \ | ||||
|   libxslt-devel \ | ||||
|   libxml2-devel \ | ||||
|   openssl-devel \ | ||||
|   valgrind \ | ||||
|   ninja-build | ||||
|   valgrind | ||||
|  | ||||
| ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH | ||||
| ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH | ||||
| @ -105,33 +101,24 @@ CMD ["/bin/bash"] | ||||
|  | ||||
| # install test dependencies: | ||||
| # - grpcio requires system openssl, bundled crypto fails to build | ||||
| # - ml_dtypes 0.4.0 requires some fixes provided in later commits to build | ||||
| RUN dnf install -y \ | ||||
|   hdf5-devel \ | ||||
|   python3-h5py \ | ||||
|   git | ||||
|   protobuf-devel \ | ||||
|   protobuf-c-devel \ | ||||
|   protobuf-lite-devel \ | ||||
|   wget \ | ||||
|   patch | ||||
|  | ||||
| RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio | ||||
|  | ||||
| # cmake-3.28.0 from pip for onnxruntime | ||||
| RUN python3 -mpip install cmake==3.28.0 | ||||
|  | ||||
| # build onnxruntime 1.21.0 from sources. | ||||
| # it is not possible to build it from sources using pip, | ||||
| # so just build it from upstream repository. | ||||
| # h5py is dependency of onnxruntime_training. | ||||
| # h5py==3.11.0 builds with hdf5-devel 1.10.5 from repository. | ||||
| # install newest flatbuffers version first: | ||||
| # for some reason old version is getting pulled in otherwise. | ||||
| # packaging package is required for onnxruntime wheel build. | ||||
| RUN pip3 install flatbuffers && \ | ||||
|   pip3 install h5py==3.11.0 && \ | ||||
|   pip3 install packaging && \ | ||||
|   git clone https://github.com/microsoft/onnxruntime && \ | ||||
|   cd onnxruntime && git checkout v1.21.0 && \ | ||||
| RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio==1.65.4 | ||||
| RUN cd ~ && \ | ||||
|   git clone https://github.com/jax-ml/ml_dtypes && \ | ||||
|   cd ml_dtypes && \ | ||||
|   git checkout v0.4.0 && \ | ||||
|   git submodule update --init --recursive && \ | ||||
|   ./build.sh --config Release --parallel 0 --enable_pybind \ | ||||
|   --build_wheel --enable_training --enable_training_apis \ | ||||
|   --enable_training_ops --skip_tests --allow_running_as_root \ | ||||
|   --compile_no_warning_as_error && \ | ||||
|   pip3 install ./build/Linux/Release/dist/onnxruntime_training-*.whl && \ | ||||
|   cd .. && /bin/rm -rf ./onnxruntime | ||||
|   wget https://github.com/jax-ml/ml_dtypes/commit/b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \ | ||||
|   wget https://github.com/jax-ml/ml_dtypes/commit/d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \ | ||||
|   patch -p1 < b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \ | ||||
|   patch -p1 < d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \ | ||||
|   python3 setup.py bdist_wheel && \ | ||||
|   pip3 install dist/*.whl && \ | ||||
|   rm -rf ml_dtypes | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| #!/usr/bin/env bash | ||||
| # Script used only in CD pipeline | ||||
|  | ||||
| set -exou pipefail | ||||
| set -eou pipefail | ||||
|  | ||||
| TOPDIR=$(git rev-parse --show-toplevel) | ||||
|  | ||||
| @ -9,108 +9,152 @@ image="$1" | ||||
| shift | ||||
|  | ||||
| if [ -z "${image}" ]; then | ||||
|   echo "Usage: $0 IMAGE:ARCHTAG" | ||||
|   echo "Usage: $0 IMAGE" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| # Go from imagename:tag to tag | ||||
| DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}') | ||||
| DOCKER_IMAGE="pytorch/${image}" | ||||
|  | ||||
| GPU_ARCH_VERSION="" | ||||
| if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then | ||||
|     # extract cuda version from image name.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8 | ||||
|     GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}') | ||||
| elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then | ||||
|     # extract rocm version from image name.  e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4 | ||||
|     GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}') | ||||
| fi | ||||
| DOCKER_REGISTRY="${DOCKER_REGISTRY:-docker.io}" | ||||
|  | ||||
| GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu} | ||||
| GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-} | ||||
| MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-} | ||||
| DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-} | ||||
| WITH_PUSH=${WITH_PUSH:-} | ||||
|  | ||||
| case ${image} in | ||||
|     manylinux2_28-builder:cpu) | ||||
| case ${GPU_ARCH_TYPE} in | ||||
|     cpu) | ||||
|         TARGET=cpu_final | ||||
|         DOCKER_TAG=cpu | ||||
|         GPU_IMAGE=centos:7 | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9" | ||||
|         ;; | ||||
|     cpu-manylinux_2_28) | ||||
|         TARGET=cpu_final | ||||
|         DOCKER_TAG=cpu | ||||
|         GPU_IMAGE=amd64/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13" | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11" | ||||
|         MANY_LINUX_VERSION="2_28" | ||||
|         ;; | ||||
|     manylinux2_28_aarch64-builder:cpu-aarch64) | ||||
|     cpu-aarch64) | ||||
|         TARGET=final | ||||
|         DOCKER_TAG=cpu-aarch64 | ||||
|         GPU_IMAGE=arm64v8/centos:7 | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10" | ||||
|         MANY_LINUX_VERSION="aarch64" | ||||
|         ;; | ||||
|     cpu-aarch64-2_28) | ||||
|         TARGET=final | ||||
|         DOCKER_TAG=cpu-aarch64 | ||||
|         GPU_IMAGE=arm64v8/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13 --build-arg NINJA_VERSION=1.12.1" | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11 --build-arg NINJA_VERSION=1.12.1" | ||||
|         MANY_LINUX_VERSION="2_28_aarch64" | ||||
|         ;; | ||||
|     manylinuxcxx11-abi-builder:cpu-cxx11-abi) | ||||
|     cpu-cxx11-abi) | ||||
|         TARGET=final | ||||
|         DOCKER_TAG=cpu-cxx11-abi | ||||
|         GPU_IMAGE="" | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9" | ||||
|         MANY_LINUX_VERSION="cxx11-abi" | ||||
|         ;; | ||||
|     manylinuxs390x-builder:cpu-s390x) | ||||
|     cpu-s390x) | ||||
|         TARGET=final | ||||
|         DOCKER_TAG=cpu-s390x | ||||
|         GPU_IMAGE=s390x/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG="" | ||||
|         MANY_LINUX_VERSION="s390x" | ||||
|         ;; | ||||
|     manylinux2_28-builder:cuda11*) | ||||
|     cuda) | ||||
|         TARGET=cuda_final | ||||
|         DOCKER_TAG=cuda${GPU_ARCH_VERSION} | ||||
|         # Keep this up to date with the minimum version of CUDA we currently support | ||||
|         GPU_IMAGE=centos:7 | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=9" | ||||
|         ;; | ||||
|     cuda-manylinux_2_28) | ||||
|         TARGET=cuda_final | ||||
|         DOCKER_TAG=cuda${GPU_ARCH_VERSION} | ||||
|         GPU_IMAGE=amd64/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11" | ||||
|         MANY_LINUX_VERSION="2_28" | ||||
|         ;; | ||||
|     manylinux2_28-builder:cuda12*) | ||||
|     cuda-aarch64) | ||||
|         TARGET=cuda_final | ||||
|         GPU_IMAGE=amd64/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13" | ||||
|         MANY_LINUX_VERSION="2_28" | ||||
|         ;; | ||||
|     manylinuxaarch64-builder:cuda*) | ||||
|         TARGET=cuda_final | ||||
|         GPU_IMAGE=amd64/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13" | ||||
|         DOCKER_TAG=cuda${GPU_ARCH_VERSION} | ||||
|         GPU_IMAGE=arm64v8/centos:7 | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11" | ||||
|         MANY_LINUX_VERSION="aarch64" | ||||
|         DOCKERFILE_SUFFIX="_cuda_aarch64" | ||||
|         ;; | ||||
|     manylinux2_28-builder:rocm*) | ||||
|     rocm|rocm-manylinux_2_28) | ||||
|         TARGET=rocm_final | ||||
|         MANY_LINUX_VERSION="2_28" | ||||
|         DEVTOOLSET_VERSION="11" | ||||
|         GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete | ||||
|         DOCKER_TAG=rocm${GPU_ARCH_VERSION} | ||||
|         GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete | ||||
|         DEVTOOLSET_VERSION="9" | ||||
|         if [ ${GPU_ARCH_TYPE} == "rocm-manylinux_2_28" ]; then | ||||
|             MANY_LINUX_VERSION="2_28" | ||||
|             DEVTOOLSET_VERSION="11" | ||||
|             GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete | ||||
|         fi | ||||
|         PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" | ||||
|         ;; | ||||
|     manylinux2_28-builder:xpu) | ||||
|     xpu) | ||||
|         TARGET=xpu_final | ||||
|         DOCKER_TAG=xpu | ||||
|         GPU_IMAGE=amd64/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11" | ||||
|         MANY_LINUX_VERSION="2_28" | ||||
|         ;; | ||||
|     *) | ||||
|         echo "ERROR: Unrecognized image name: ${image}" | ||||
|         echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}" | ||||
|         exit 1 | ||||
|         ;; | ||||
| esac | ||||
|  | ||||
| IMAGES='' | ||||
|  | ||||
| if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then | ||||
|     DOCKERFILE_SUFFIX=_${MANY_LINUX_VERSION} | ||||
| fi | ||||
| # Only activate this if in CI | ||||
| if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then | ||||
|     # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712 | ||||
|     # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023. | ||||
|     sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service | ||||
|     sudo systemctl daemon-reload | ||||
|     sudo systemctl restart docker | ||||
| ( | ||||
|     set -x | ||||
|  | ||||
|     # Only activate this if in CI | ||||
|     if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then | ||||
|         # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712 | ||||
|         # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023. | ||||
|         sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service | ||||
|         sudo systemctl daemon-reload | ||||
|         sudo systemctl restart docker | ||||
|     fi | ||||
|  | ||||
|     DOCKER_BUILDKIT=1 docker build  \ | ||||
|         ${DOCKER_GPU_BUILD_ARG} \ | ||||
|         --build-arg "GPU_IMAGE=${GPU_IMAGE}" \ | ||||
|         --target "${TARGET}" \ | ||||
|         -t "${DOCKER_IMAGE}" \ | ||||
|         $@ \ | ||||
|         -f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \ | ||||
|         "${TOPDIR}/.ci/docker/" | ||||
| ) | ||||
|  | ||||
| GITHUB_REF=${GITHUB_REF:-"dev")} | ||||
| GIT_BRANCH_NAME=${GITHUB_REF##*/} | ||||
| GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)} | ||||
| DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME} | ||||
| DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA} | ||||
|  | ||||
| if [[ "${WITH_PUSH}" == true ]]; then | ||||
|     ( | ||||
|         set -x | ||||
|         docker push "${DOCKER_IMAGE}" | ||||
|         if [[ -n ${GITHUB_REF} ]]; then | ||||
|             docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG} | ||||
|             docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG} | ||||
|             docker push "${DOCKER_IMAGE_BRANCH_TAG}" | ||||
|             docker push "${DOCKER_IMAGE_SHA_TAG}" | ||||
|         fi | ||||
|     ) | ||||
| fi | ||||
|  | ||||
| tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]') | ||||
|  | ||||
| DOCKER_BUILDKIT=1 docker build  \ | ||||
|     ${DOCKER_GPU_BUILD_ARG} \ | ||||
|     --build-arg "GPU_IMAGE=${GPU_IMAGE}" \ | ||||
|     --target "${TARGET}" \ | ||||
|     -t "${tmp_tag}" \ | ||||
|     $@ \ | ||||
|     -f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \ | ||||
|     "${TOPDIR}/.ci/docker/" | ||||
|  | ||||
| @ -97,7 +97,7 @@ find /opt/_internal -type f -print0 \ | ||||
|     | xargs -0 -n1 strip --strip-unneeded 2>/dev/null || true | ||||
| # We do not need the Python test suites, or indeed the precompiled .pyc and | ||||
| # .pyo files. Partially cribbed from: | ||||
| #    https://github.com/docker-library/python/blob/master/3.4/slim/Dockerfile  # @lint-ignore | ||||
| #    https://github.com/docker-library/python/blob/master/3.4/slim/Dockerfile | ||||
| find /opt/_internal \ | ||||
|      \( -type d -a -name test -o -name tests \) \ | ||||
|   -o \( -type f -a -name '*.pyc' -o -name '*.pyo' \) \ | ||||
|  | ||||
| @ -2,7 +2,7 @@ | ||||
| # Helper utilities for build | ||||
| # Script used only in CD pipeline | ||||
|  | ||||
| OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/  # @lint-ignore | ||||
| OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/ | ||||
| CURL_DOWNLOAD_URL=https://curl.se/download | ||||
|  | ||||
| AUTOCONF_DOWNLOAD_URL=https://ftp.gnu.org/gnu/autoconf | ||||
|  | ||||
| @ -41,14 +41,11 @@ fbscribelogger==0.1.7 | ||||
| #Pinned versions: 0.1.6 | ||||
| #test that import: | ||||
|  | ||||
| flatbuffers==2.0 ; platform_machine != "s390x" | ||||
| flatbuffers==2.0 | ||||
| #Description: cross platform serialization library | ||||
| #Pinned versions: 2.0 | ||||
| #test that import: | ||||
|  | ||||
| flatbuffers ; platform_machine == "s390x" | ||||
| #Description: cross platform serialization library; Newer version is required on s390x for new python version | ||||
|  | ||||
| hypothesis==5.35.1 | ||||
| # Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136 | ||||
| #Description: advanced library for generating parametrized tests | ||||
| @ -93,7 +90,7 @@ librosa>=0.6.2 ; python_version < "3.11" | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| mypy==1.15.0 | ||||
| mypy==1.14.0 | ||||
| # Pin MyPy version because new errors are likely to appear with each release | ||||
| #Description: linter | ||||
| #Pinned versions: 1.14.0 | ||||
| @ -105,10 +102,10 @@ networkx==2.8.8 | ||||
| #Pinned versions: 2.8.8 | ||||
| #test that import: functorch | ||||
|  | ||||
| ninja==1.11.1.3 | ||||
| #Description: build system. Used in some tests. Used in build to generate build | ||||
| #time tracing information | ||||
| #Pinned versions: 1.11.1.3 | ||||
| #ninja | ||||
| #Description: build system.  Note that it install from | ||||
| #here breaks things so it is commented out | ||||
| #Pinned versions: 1.10.0.post1 | ||||
| #test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py | ||||
|  | ||||
| numba==0.49.0 ; python_version < "3.9" | ||||
| @ -166,10 +163,10 @@ pillow==11.0.0 | ||||
| #Pinned versions: 10.3.0 | ||||
| #test that import: | ||||
|  | ||||
| protobuf==5.29.4 | ||||
| #Description:  Google's data interchange format | ||||
| #Pinned versions: 5.29.4 | ||||
| #test that import: test_tensorboard.py, test/onnx/* | ||||
| protobuf==3.20.2 | ||||
| #Description:  Google’s data interchange format | ||||
| #Pinned versions: 3.20.1 | ||||
| #test that import: test_tensorboard.py | ||||
|  | ||||
| psutil | ||||
| #Description: information on running processes and system utilization | ||||
| @ -337,12 +334,12 @@ sympy==1.13.3 | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| onnx==1.18.0 | ||||
| #Description: Required by onnx tests, and mypy and test_public_bindings.py when checking torch.onnx._internal | ||||
| onnx==1.17.0 | ||||
| #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| onnxscript==0.2.6 | ||||
| onnxscript==0.2.2 | ||||
| #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
| @ -356,7 +353,7 @@ parameterized==0.8.1 | ||||
| #Pinned versions: 1.24.0 | ||||
| #test that import: test_sac_estimator.py | ||||
|  | ||||
| pwlf==2.2.1 | ||||
| pwlf==2.2.1 ; python_version >= "3.8" | ||||
| #Description: required for testing torch/distributed/_tools/sac_estimator.py | ||||
| #Pinned versions: 2.2.1 | ||||
| #test that import: test_sac_estimator.py | ||||
| @ -368,9 +365,10 @@ PyYAML | ||||
| pyzstd | ||||
| setuptools | ||||
|  | ||||
| ninja==1.11.1 ; platform_machine == "aarch64" | ||||
| scons==4.5.2 ; platform_machine == "aarch64" | ||||
|  | ||||
| pulp==2.9.0 | ||||
| pulp==2.9.0 ; python_version >= "3.8" | ||||
| #Description: required for testing ilp formulaiton under torch/distributed/_tools | ||||
| #Pinned versions: 2.9.0 | ||||
| #test that import: test_sac_ilp.py | ||||
| @ -379,6 +377,3 @@ dataclasses_json==0.6.7 | ||||
| #Description: required for data pipeline and scripts under tools/stats | ||||
| #Pinned versions: 0.6.7 | ||||
| #test that import: | ||||
|  | ||||
| cmake==4.0.0 | ||||
| #Description: required for building | ||||
|  | ||||
| @ -1,24 +1,15 @@ | ||||
| sphinx==5.3.0 | ||||
| #Description: This is used to generate PyTorch docs | ||||
| #Pinned versions: 5.3.0 | ||||
| -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2 | ||||
| -e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme | ||||
|  | ||||
| # TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering | ||||
| # but it doesn't seem to work and hangs around idly. The initial thought is probably | ||||
| # something related to Docker setup. We can investigate this later | ||||
|  | ||||
| sphinxcontrib.katex==0.8.6 | ||||
| #Description: This is used to generate PyTorch docs | ||||
| #Pinned versions: 0.8.6 | ||||
|  | ||||
| sphinxext-opengraph==0.9.1 | ||||
| #Description: This is used to generate PyTorch docs | ||||
| #Pinned versions: 0.9.1 | ||||
|  | ||||
| sphinx_sitemap==2.6.0 | ||||
| #Description: This is used to generate sitemap for PyTorch docs | ||||
| #Pinned versions: 2.6.0 | ||||
|  | ||||
| matplotlib==3.5.3 | ||||
| #Description: This is used to generate PyTorch docs | ||||
| #Pinned versions: 3.5.3 | ||||
| @ -55,6 +46,5 @@ myst-nb==0.17.2 | ||||
| # The following are required to build torch.distributed.elastic.rendezvous.etcd* docs | ||||
| python-etcd==0.4.5 | ||||
| sphinx-copybutton==0.5.0 | ||||
| sphinx-design==0.4.0 | ||||
| sphinxcontrib-mermaid==1.0.0 | ||||
| sphinx-panels==0.4.1 | ||||
| myst-parser==0.18.1 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| 3.3.1 | ||||
| 3.3.0 | ||||
|  | ||||
| @ -2,7 +2,7 @@ ARG UBUNTU_VERSION | ||||
| ARG CUDA_VERSION | ||||
| ARG IMAGE_NAME | ||||
|  | ||||
| FROM ${IMAGE_NAME} as base | ||||
| FROM ${IMAGE_NAME} | ||||
|  | ||||
| ARG UBUNTU_VERSION | ||||
| ARG CUDA_VERSION | ||||
| @ -26,6 +26,7 @@ RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH | ||||
| ARG CONDA_CMAKE | ||||
| COPY requirements-ci.txt /opt/conda/requirements-ci.txt | ||||
| COPY ./common/install_conda.sh install_conda.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| @ -42,6 +43,20 @@ ARG CLANG_VERSION | ||||
| COPY ./common/install_clang.sh install_clang.sh | ||||
| RUN bash ./install_clang.sh && rm install_clang.sh | ||||
|  | ||||
| # (optional) Install protobuf for ONNX | ||||
| ARG PROTOBUF | ||||
| COPY ./common/install_protobuf.sh install_protobuf.sh | ||||
| RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi | ||||
| RUN rm install_protobuf.sh | ||||
| ENV INSTALLED_PROTOBUF ${PROTOBUF} | ||||
|  | ||||
| # (optional) Install database packages like LMDB and LevelDB | ||||
| ARG DB | ||||
| COPY ./common/install_db.sh install_db.sh | ||||
| RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi | ||||
| RUN rm install_db.sh | ||||
| ENV INSTALLED_DB ${DB} | ||||
|  | ||||
| # (optional) Install vision packages like OpenCV | ||||
| ARG VISION | ||||
| COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ | ||||
| @ -75,21 +90,21 @@ COPY ci_commit_pins/timm.txt timm.txt | ||||
| RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt | ||||
|  | ||||
| ARG TRITON | ||||
| # (optional) Install non-default CMake version | ||||
| ARG CMAKE_VERSION | ||||
| COPY ./common/install_cmake.sh install_cmake.sh | ||||
| RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi | ||||
| RUN rm install_cmake.sh | ||||
|  | ||||
| FROM base as triton-builder | ||||
| ARG TRITON | ||||
| # Install triton, this needs to be done before sccache because the latter will | ||||
| # try to reach out to S3, which docker build runners don't have access | ||||
| COPY ./common/install_triton.sh install_triton.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/triton.txt triton.txt | ||||
| COPY triton_version.txt triton_version.txt | ||||
| RUN bash ./install_triton.sh | ||||
|  | ||||
| FROM base as final | ||||
| COPY --from=triton-builder /opt/triton /opt/triton | ||||
| RUN if [ -n "${TRITON}" ]; then pip install /opt/triton/*.whl; chown -R jenkins:jenkins /opt/conda; fi | ||||
| RUN rm -rf /opt/triton | ||||
| RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt | ||||
|  | ||||
| ARG HALIDE | ||||
| # Build and install halide | ||||
| @ -144,16 +159,6 @@ COPY ./common/install_cusparselt.sh install_cusparselt.sh | ||||
| RUN bash install_cusparselt.sh | ||||
| RUN rm install_cusparselt.sh | ||||
|  | ||||
| # Install NCCL | ||||
| ARG CUDA_VERSION | ||||
| COPY ./common/install_nccl.sh install_nccl.sh | ||||
| COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/ | ||||
| RUN bash install_nccl.sh | ||||
| RUN rm install_nccl.sh /ci_commit_pins/nccl-cu* | ||||
| ENV USE_SYSTEM_NCCL=1 | ||||
| ENV NCCL_INCLUDE_DIR="/usr/local/cuda/include/" | ||||
| ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/" | ||||
|  | ||||
| # Install CUDSS | ||||
| ARG CUDA_VERSION | ||||
| COPY ./common/install_cudss.sh install_cudss.sh | ||||
|  | ||||
| @ -27,6 +27,7 @@ RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH | ||||
| ARG CONDA_CMAKE | ||||
| COPY requirements-ci.txt /opt/conda/requirements-ci.txt | ||||
| COPY ./common/install_conda.sh install_conda.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| @ -42,6 +43,20 @@ ARG CLANG_VERSION | ||||
| COPY ./common/install_clang.sh install_clang.sh | ||||
| RUN bash ./install_clang.sh && rm install_clang.sh | ||||
|  | ||||
| # (optional) Install protobuf for ONNX | ||||
| ARG PROTOBUF | ||||
| COPY ./common/install_protobuf.sh install_protobuf.sh | ||||
| RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi | ||||
| RUN rm install_protobuf.sh | ||||
| ENV INSTALLED_PROTOBUF ${PROTOBUF} | ||||
|  | ||||
| # (optional) Install database packages like LMDB and LevelDB | ||||
| ARG DB | ||||
| COPY ./common/install_db.sh install_db.sh | ||||
| RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi | ||||
| RUN rm install_db.sh | ||||
| ENV INSTALLED_DB ${DB} | ||||
|  | ||||
| # (optional) Install vision packages like OpenCV | ||||
| ARG VISION | ||||
| COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ | ||||
| @ -55,7 +70,7 @@ COPY ./common/install_rocm.sh install_rocm.sh | ||||
| RUN bash ./install_rocm.sh | ||||
| RUN rm install_rocm.sh | ||||
| COPY ./common/install_rocm_magma.sh install_rocm_magma.sh | ||||
| RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} | ||||
| RUN bash ./install_rocm_magma.sh | ||||
| RUN rm install_rocm_magma.sh | ||||
| ADD ./common/install_miopen.sh install_miopen.sh | ||||
| RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh | ||||
| @ -100,6 +115,12 @@ COPY ci_commit_pins/timm.txt timm.txt | ||||
| RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt | ||||
|  | ||||
| # (optional) Install non-default CMake version | ||||
| ARG CMAKE_VERSION | ||||
| COPY ./common/install_cmake.sh install_cmake.sh | ||||
| RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi | ||||
| RUN rm install_cmake.sh | ||||
|  | ||||
| # (optional) Install non-default Ninja version | ||||
| ARG NINJA_VERSION | ||||
| COPY ./common/install_ninja.sh install_ninja.sh | ||||
|  | ||||
| @ -28,6 +28,7 @@ RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh | ||||
|  | ||||
| # Install conda and other packages (e.g., numpy, pytest) | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ARG CONDA_CMAKE | ||||
| ARG DOCS | ||||
| ARG BUILD_ENVIRONMENT | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| @ -76,6 +77,13 @@ COPY triton_version.txt triton_version.txt | ||||
| RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton-xpu.txt triton_version.txt | ||||
|  | ||||
| # (optional) Install database packages like LMDB and LevelDB | ||||
| ARG DB | ||||
| COPY ./common/install_db.sh install_db.sh | ||||
| RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi | ||||
| RUN rm install_db.sh | ||||
| ENV INSTALLED_DB ${DB} | ||||
|  | ||||
| # (optional) Install vision packages like OpenCV | ||||
| ARG VISION | ||||
| COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ | ||||
| @ -83,6 +91,12 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi | ||||
| RUN rm install_vision.sh cache_vision_models.sh common_utils.sh | ||||
| ENV INSTALLED_VISION ${VISION} | ||||
|  | ||||
| # (optional) Install non-default CMake version | ||||
| ARG CMAKE_VERSION | ||||
| COPY ./common/install_cmake.sh install_cmake.sh | ||||
| RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi | ||||
| RUN rm install_cmake.sh | ||||
|  | ||||
| # (optional) Install non-default Ninja version | ||||
| ARG NINJA_VERSION | ||||
| COPY ./common/install_ninja.sh install_ninja.sh | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| ARG UBUNTU_VERSION | ||||
|  | ||||
| FROM ubuntu:${UBUNTU_VERSION} as base | ||||
| FROM ubuntu:${UBUNTU_VERSION} | ||||
|  | ||||
| ARG UBUNTU_VERSION | ||||
|  | ||||
| @ -28,6 +28,7 @@ RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh | ||||
|  | ||||
| # Install conda and other packages (e.g., numpy, pytest) | ||||
| ARG ANACONDA_PYTHON_VERSION | ||||
| ARG CONDA_CMAKE | ||||
| ARG DOCS | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH | ||||
| @ -51,17 +52,9 @@ RUN  bash ./install_lcov.sh && rm install_lcov.sh | ||||
| # Install cuda and cudnn | ||||
| ARG CUDA_VERSION | ||||
| COPY ./common/install_cuda.sh install_cuda.sh | ||||
| COPY ./common/install_nccl.sh install_nccl.sh | ||||
| COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/ | ||||
| COPY ./common/install_cusparselt.sh install_cusparselt.sh | ||||
| RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu* install_cusparselt.sh | ||||
| RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh | ||||
| ENV DESIRED_CUDA ${CUDA_VERSION} | ||||
| ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH | ||||
| # No effect if cuda not installed | ||||
| ENV USE_SYSTEM_NCCL=1 | ||||
| ENV NCCL_INCLUDE_DIR="/usr/local/cuda/include/" | ||||
| ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/" | ||||
|  | ||||
|  | ||||
| # (optional) Install UCC | ||||
| ARG UCX_COMMIT | ||||
| @ -74,6 +67,20 @@ ADD ./common/install_ucc.sh install_ucc.sh | ||||
| RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi | ||||
| RUN rm install_ucc.sh | ||||
|  | ||||
| # (optional) Install protobuf for ONNX | ||||
| ARG PROTOBUF | ||||
| COPY ./common/install_protobuf.sh install_protobuf.sh | ||||
| RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi | ||||
| RUN rm install_protobuf.sh | ||||
| ENV INSTALLED_PROTOBUF ${PROTOBUF} | ||||
|  | ||||
| # (optional) Install database packages like LMDB and LevelDB | ||||
| ARG DB | ||||
| COPY ./common/install_db.sh install_db.sh | ||||
| RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi | ||||
| RUN rm install_db.sh | ||||
| ENV INSTALLED_DB ${DB} | ||||
|  | ||||
| # (optional) Install vision packages like OpenCV | ||||
| ARG VISION | ||||
| COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./ | ||||
| @ -81,6 +88,24 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi | ||||
| RUN rm install_vision.sh cache_vision_models.sh common_utils.sh | ||||
| ENV INSTALLED_VISION ${VISION} | ||||
|  | ||||
| # (optional) Install Vulkan SDK | ||||
| ARG VULKAN_SDK_VERSION | ||||
| COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh | ||||
| RUN if [ -n "${VULKAN_SDK_VERSION}" ]; then bash ./install_vulkan_sdk.sh; fi | ||||
| RUN rm install_vulkan_sdk.sh | ||||
|  | ||||
| # (optional) Install swiftshader | ||||
| ARG SWIFTSHADER | ||||
| COPY ./common/install_swiftshader.sh install_swiftshader.sh | ||||
| RUN if [ -n "${SWIFTSHADER}" ]; then bash ./install_swiftshader.sh; fi | ||||
| RUN rm install_swiftshader.sh | ||||
|  | ||||
| # (optional) Install non-default CMake version | ||||
| ARG CMAKE_VERSION | ||||
| COPY ./common/install_cmake.sh install_cmake.sh | ||||
| RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi | ||||
| RUN rm install_cmake.sh | ||||
|  | ||||
| # (optional) Install non-default Ninja version | ||||
| ARG NINJA_VERSION | ||||
| COPY ./common/install_ninja.sh install_ninja.sh | ||||
| @ -102,21 +127,20 @@ RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_d | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt | ||||
|  | ||||
| ARG TRITON | ||||
| ARG TRITON_CPU | ||||
|  | ||||
| # Create a separate stage for building Triton and Triton-CPU.  install_triton | ||||
| # will check for the presence of env vars | ||||
| FROM base as triton-builder | ||||
| # Install triton, this needs to be done before sccache because the latter will | ||||
| # try to reach out to S3, which docker build runners don't have access | ||||
| COPY ./common/install_triton.sh install_triton.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/triton.txt triton.txt | ||||
| COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt | ||||
| RUN bash ./install_triton.sh | ||||
| RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton.txt | ||||
|  | ||||
| FROM base as final | ||||
| COPY --from=triton-builder /opt/triton /opt/triton | ||||
| RUN if [ -n "${TRITON}" ] || [ -n "${TRITON_CPU}" ]; then pip install /opt/triton/*.whl; chown -R jenkins:jenkins /opt/conda; fi | ||||
| RUN rm -rf /opt/triton | ||||
| ARG TRITON_CPU | ||||
| COPY ./common/install_triton.sh install_triton.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt | ||||
| RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi | ||||
| RUN rm install_triton.sh common_utils.sh triton-cpu.txt | ||||
|  | ||||
| ARG EXECUTORCH | ||||
| # Build and install executorch | ||||
|  | ||||
							
								
								
									
										2
									
								
								.ci/magma-rocm/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.ci/magma-rocm/.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -1,2 +0,0 @@ | ||||
| output/ | ||||
| magma-rocm*/ | ||||
| @ -1,35 +0,0 @@ | ||||
| SHELL=/usr/bin/env bash | ||||
|  | ||||
| DOCKER_CMD ?= docker | ||||
| DESIRED_ROCM ?= 6.4 | ||||
| DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM)) | ||||
| PACKAGE_NAME = magma-rocm | ||||
| # inherit this from underlying docker image, do not pass this env var to docker | ||||
| #PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201 | ||||
|  | ||||
| DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \ | ||||
| 	-v $(shell git rev-parse --show-toplevel)/.ci:/builder \ | ||||
| 	-w /builder \ | ||||
| 	-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_ROCM_SHORT} \ | ||||
| 	-e DESIRED_ROCM=${DESIRED_ROCM} \ | ||||
| 	"pytorch/almalinux-builder:rocm${DESIRED_ROCM}" \ | ||||
| 	magma-rocm/build_magma.sh | ||||
|  | ||||
| .PHONY: all | ||||
| all: magma-rocm64 | ||||
| all: magma-rocm63 | ||||
|  | ||||
| .PHONY: | ||||
| clean: | ||||
| 	$(RM) -r magma-* | ||||
| 	$(RM) -r output | ||||
|  | ||||
| .PHONY: magma-rocm64 | ||||
| magma-rocm64: DESIRED_ROCM := 6.4 | ||||
| magma-rocm64: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-rocm63 | ||||
| magma-rocm63: DESIRED_ROCM := 6.3 | ||||
| magma-rocm63: | ||||
| 	$(DOCKER_RUN) | ||||
| @ -1,48 +0,0 @@ | ||||
| # Magma ROCm | ||||
|  | ||||
| This folder contains the scripts and configurations to build libmagma.so, linked for various versions of ROCm. | ||||
|  | ||||
| ## Building | ||||
|  | ||||
| Look in the `Makefile` for available targets to build. To build any target, for example `magma-rocm63`, run | ||||
|  | ||||
| ``` | ||||
| # Using `docker` | ||||
| make magma-rocm63 | ||||
|  | ||||
| # Using `podman` | ||||
| DOCKER_CMD=podman make magma-rocm63 | ||||
| ``` | ||||
|  | ||||
| This spawns a `pytorch/manylinux-rocm<version>` docker image, which has the required `devtoolset` and ROCm versions installed. | ||||
| Within the docker image, it runs `build_magma.sh` with the correct environment variables set, which package the necessary files | ||||
| into a tarball, with the following structure: | ||||
|  | ||||
| ``` | ||||
| . | ||||
| ├── include       # header files | ||||
| ├── lib           # libmagma.so | ||||
| ├── info | ||||
| │   ├── licenses  # license file | ||||
| │   └── recipe    # build script | ||||
| ``` | ||||
|  | ||||
| More specifically, `build_magma.sh` copies over the relevant files from the `package_files` directory depending on the ROCm version. | ||||
| Outputted binaries should be in the `output` folder. | ||||
|  | ||||
|  | ||||
| ## Pushing | ||||
|  | ||||
| Packages can be uploaded to an S3 bucket using: | ||||
|  | ||||
| ``` | ||||
| aws s3 cp output/*/magma-cuda*.bz2 <bucket-with-path> | ||||
| ``` | ||||
|  | ||||
| If you do not have upload permissions, please ping @seemethere or @soumith to gain access | ||||
|  | ||||
| ## New versions | ||||
|  | ||||
| New ROCm versions can be added by creating a new make target with the next desired version. For ROCm version N.n, the target should be named `magma-rocmNn`. | ||||
|  | ||||
| Make sure to edit the appropriate environment variables (e.g., DESIRED_ROCM) in the `Makefile` accordingly. Remember also to check `build_magma.sh` to ensure the logic for copying over the files remains correct. | ||||
| @ -1,42 +0,0 @@ | ||||
| #!/usr/bin/env bash | ||||
|  | ||||
| set -eou pipefail | ||||
|  | ||||
| # Environment variables | ||||
| # The script expects DESIRED_CUDA and PACKAGE_NAME to be set | ||||
| ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" | ||||
|  | ||||
| # Version 2.7.2 + ROCm related updates | ||||
| MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6 | ||||
|  | ||||
| # Folders for the build | ||||
| PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata | ||||
| PACKAGE_DIR=${ROOT_DIR}/magma-rocm/${PACKAGE_NAME} # build workspace | ||||
| PACKAGE_OUTPUT=${ROOT_DIR}/magma-rocm/output # where tarballs are stored | ||||
| PACKAGE_BUILD=${PACKAGE_DIR} # where the content of the tarball is prepared | ||||
| PACKAGE_RECIPE=${PACKAGE_BUILD}/info/recipe | ||||
| PACKAGE_LICENSE=${PACKAGE_BUILD}/info/licenses | ||||
| mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RECIPE} ${PACKAGE_LICENSE} | ||||
|  | ||||
| # Fetch magma sources and verify checksum | ||||
| pushd ${PACKAGE_DIR} | ||||
| git clone https://bitbucket.org/icl/magma.git | ||||
| pushd magma | ||||
| git checkout ${MAGMA_VERSION} | ||||
| popd | ||||
| popd | ||||
|  | ||||
| # build | ||||
| pushd ${PACKAGE_DIR}/magma | ||||
| # The build.sh script expects to be executed from the sources root folder | ||||
| INSTALL_DIR=${PACKAGE_BUILD} ${PACKAGE_FILES}/build.sh | ||||
| popd | ||||
|  | ||||
| # Package recipe, license and tarball | ||||
| # Folder and package name are backward compatible for the build workflow | ||||
| cp ${PACKAGE_FILES}/build.sh ${PACKAGE_RECIPE}/build.sh | ||||
| cp ${PACKAGE_DIR}/magma/COPYRIGHT ${PACKAGE_LICENSE}/COPYRIGHT | ||||
| pushd ${PACKAGE_BUILD} | ||||
| tar cjf ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2 include lib info | ||||
| echo Built in ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2 | ||||
| popd | ||||
| @ -1,38 +0,0 @@ | ||||
| # Magma build scripts need `python` | ||||
| ln -sf /usr/bin/python3 /usr/bin/python | ||||
|  | ||||
| ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') | ||||
| case "$ID" in | ||||
|   almalinux) | ||||
|     yum install -y gcc-gfortran | ||||
|     ;; | ||||
|   *) | ||||
|     echo "No preinstalls to build magma..." | ||||
|     ;; | ||||
| esac | ||||
|  | ||||
| MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION} | ||||
|  | ||||
| cp make.inc-examples/make.inc.hip-gcc-mkl make.inc | ||||
| echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc | ||||
| if [[ -f "${MKLROOT}/lib/libmkl_core.a" ]]; then | ||||
|     echo 'LIB = -Wl,--start-group -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -Wl,--end-group -lpthread -lstdc++ -lm -lgomp -lhipblas -lhipsparse' >> make.inc | ||||
| fi | ||||
| echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib -ldl' >> make.inc | ||||
| echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc | ||||
| export PATH="${PATH}:/opt/rocm/bin" | ||||
| if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then | ||||
|   amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'` | ||||
| else | ||||
|   amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs` | ||||
| fi | ||||
| for arch in $amdgpu_targets; do | ||||
|   echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc | ||||
| done | ||||
| # hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition | ||||
| sed -i 's/^FOPENMP/#FOPENMP/g' make.inc | ||||
| make -f make.gen.hipMAGMA -j $(nproc) | ||||
| LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}" | ||||
| make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}" | ||||
| cp -R lib ${INSTALL_DIR} | ||||
| cp -R include ${INSTALL_DIR} | ||||
| @ -12,12 +12,13 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \ | ||||
| 	-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_CUDA_SHORT} \ | ||||
| 	-e DESIRED_CUDA=${DESIRED_CUDA} \ | ||||
| 	-e CUDA_ARCH_LIST="${CUDA_ARCH_LIST}" \ | ||||
| 	"pytorch/almalinux-builder:cuda${DESIRED_CUDA}-main" \ | ||||
| 	"pytorch/manylinux2_28-builder:cuda${DESIRED_CUDA}-main" \ | ||||
| 	magma/build_magma.sh | ||||
|  | ||||
| .PHONY: all | ||||
| all: magma-cuda128 | ||||
| all: magma-cuda126 | ||||
| all: magma-cuda124 | ||||
| all: magma-cuda118 | ||||
|  | ||||
| .PHONY: | ||||
| @ -36,6 +37,11 @@ magma-cuda126: DESIRED_CUDA := 12.6 | ||||
| magma-cuda126: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-cuda124 | ||||
| magma-cuda124: DESIRED_CUDA := 12.4 | ||||
| magma-cuda124: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-cuda118 | ||||
| magma-cuda118: DESIRED_CUDA := 11.8 | ||||
| magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_37,code=sm_37 | ||||
|  | ||||
| @ -18,10 +18,12 @@ retry () { | ||||
|     $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*) | ||||
| } | ||||
|  | ||||
| PLATFORM="" | ||||
| PLATFORM="manylinux2014_x86_64" | ||||
| # TODO move this into the Docker images | ||||
| OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release) | ||||
| if [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|     retry yum install -q -y zip openssl | ||||
| elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     retry yum install -q -y zip openssl | ||||
|     PLATFORM="manylinux_2_28_x86_64" | ||||
| elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then | ||||
| @ -34,9 +36,6 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|  | ||||
|     retry apt-get update | ||||
|     retry apt-get -y install zip openssl | ||||
| else | ||||
|     echo "Unknown OS: '$OS_NAME'" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # We use the package name to test the package by passing this to 'pip install' | ||||
| @ -80,6 +79,8 @@ if [[ -e /opt/openssl ]]; then | ||||
|     export CMAKE_INCLUDE_PATH="/opt/openssl/include":$CMAKE_INCLUDE_PATH | ||||
| fi | ||||
|  | ||||
|  | ||||
|  | ||||
| mkdir -p /tmp/$WHEELHOUSE_DIR | ||||
|  | ||||
| export PATCHELF_BIN=/usr/local/bin/patchelf | ||||
| @ -110,6 +111,12 @@ case ${DESIRED_PYTHON} in | ||||
|     ;; | ||||
| esac | ||||
|  | ||||
| if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     export _GLIBCXX_USE_CXX11_ABI=1 | ||||
| else | ||||
|     export _GLIBCXX_USE_CXX11_ABI=0 | ||||
| fi | ||||
|  | ||||
| if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|     echo "Calling build_amd.py at $(date)" | ||||
|     python tools/amd_build/build_amd.py | ||||
| @ -202,6 +209,12 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then | ||||
|  | ||||
|     mkdir -p /tmp/$LIBTORCH_HOUSE_DIR | ||||
|  | ||||
|     if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|         LIBTORCH_ABI="cxx11-abi-" | ||||
|     else | ||||
|         LIBTORCH_ABI= | ||||
|     fi | ||||
|  | ||||
|     zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch | ||||
|     cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \ | ||||
|        /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip | ||||
| @ -320,8 +333,8 @@ for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.w | ||||
|             # ROCm workaround for roctracer dlopens | ||||
|             if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|                 patchedpath=$(fname_without_so_number $destpath) | ||||
|             # Keep the so number for XPU dependencies and libgomp.so.1 to avoid twice load | ||||
|             elif [[ "$DESIRED_CUDA" == *"xpu"* || "$filename" == "libgomp.so.1" ]]; then | ||||
|             # Keep the so number for XPU dependencies | ||||
|             elif [[ "$DESIRED_CUDA" == *"xpu"* ]]; then | ||||
|                 patchedpath=$destpath | ||||
|             else | ||||
|                 patchedpath=$(fname_with_sha256 $destpath) | ||||
|  | ||||
| @ -36,8 +36,10 @@ if [[ -n "$DESIRED_CUDA" ]]; then | ||||
|     if [[ ${DESIRED_CUDA} =~ ^[0-9]+\.[0-9]+$ ]]; then | ||||
|         CUDA_VERSION=${DESIRED_CUDA} | ||||
|     else | ||||
|         # cu126, cu128 etc... | ||||
|         if [[ ${#DESIRED_CUDA} -eq 5 ]]; then | ||||
|         # cu90, cu92, cu100, cu101 | ||||
|         if [[ ${#DESIRED_CUDA} -eq 4 ]]; then | ||||
|             CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}" | ||||
|         elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then | ||||
|             CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}" | ||||
|         fi | ||||
|     fi | ||||
| @ -59,6 +61,10 @@ case ${CUDA_VERSION} in | ||||
|         TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0" | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
|     12.4) | ||||
|         TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0" | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
|         ;; | ||||
|     11.8) | ||||
|         TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7;9.0" | ||||
|         EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON") | ||||
| @ -85,15 +91,14 @@ fi | ||||
| mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true | ||||
|  | ||||
| OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release) | ||||
| if [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib64/libgomp.so.1" | ||||
| elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|     LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1" | ||||
| else | ||||
|     echo "Unknown OS: '$OS_NAME'" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| DEPS_LIST=( | ||||
| @ -103,8 +108,26 @@ DEPS_SONAME=( | ||||
|     "libgomp.so.1" | ||||
| ) | ||||
|  | ||||
| # CUDA 11.8 have to ship the libcusparseLt.so.0 with the binary | ||||
| # since nvidia-cusparselt-cu11 is not available in PYPI | ||||
| if [[ $USE_CUSPARSELT == "1" && $CUDA_VERSION == "11.8" ]]; then | ||||
|         DEPS_SONAME+=( | ||||
|             "libcusparseLt.so.0" | ||||
|         ) | ||||
|         DEPS_LIST+=( | ||||
|             "/usr/local/cuda/lib64/libcusparseLt.so.0" | ||||
|         ) | ||||
| fi | ||||
|  | ||||
| # CUDA_VERSION 12.6, 12.8 | ||||
|  | ||||
| # Turn USE_CUFILE off for CUDA 11.8, 12.4 since nvidia-cufile-cu11 and 1.9.0.20 are | ||||
| # not available in PYPI | ||||
| if [[ $CUDA_VERSION == "11.8" || $CUDA_VERSION == "12.4" ]]; then | ||||
|     export USE_CUFILE=0 | ||||
| fi | ||||
|  | ||||
|  | ||||
| # CUDA_VERSION 12.4, 12.6, 12.8 | ||||
| if [[ $CUDA_VERSION == 12* ]]; then | ||||
|     export USE_STATIC_CUDNN=0 | ||||
|     # Try parallelizing nvcc as well | ||||
| @ -128,8 +151,6 @@ if [[ $CUDA_VERSION == 12* ]]; then | ||||
|             "/usr/local/cuda/lib64/libnvToolsExt.so.1" | ||||
|             "/usr/local/cuda/lib64/libnvrtc.so.12" | ||||
|             "/usr/local/cuda/lib64/libnvrtc-builtins.so" | ||||
|             "/usr/local/cuda/lib64/libcufile.so.0" | ||||
|             "/usr/local/cuda/lib64/libcufile_rdma.so.1" | ||||
|         ) | ||||
|         DEPS_SONAME+=( | ||||
|             "libcudnn_adv.so.9" | ||||
| @ -147,9 +168,17 @@ if [[ $CUDA_VERSION == 12* ]]; then | ||||
|             "libnvToolsExt.so.1" | ||||
|             "libnvrtc.so.12" | ||||
|             "libnvrtc-builtins.so" | ||||
|             "libcufile.so.0" | ||||
|             "libcufile_rdma.so.1" | ||||
|         ) | ||||
|         if [[ $USE_CUFILE == 1 ]]; then | ||||
|             DEPS_LIST+=( | ||||
|                 "/usr/local/cuda/lib64/libcufile.so.0" | ||||
|                 "/usr/local/cuda/lib64/libcufile_rdma.so.1" | ||||
|             ) | ||||
|             DEPS_SONAME+=( | ||||
|                 "libcufile.so.0" | ||||
|                 "libcufile_rdma.so.1" | ||||
|             ) | ||||
|         fi | ||||
|     else | ||||
|         echo "Using nvidia libs from pypi." | ||||
|         CUDA_RPATHS=( | ||||
| @ -165,8 +194,12 @@ if [[ $CUDA_VERSION == 12* ]]; then | ||||
|             '$ORIGIN/../../cusparselt/lib' | ||||
|             '$ORIGIN/../../nvidia/nccl/lib' | ||||
|             '$ORIGIN/../../nvidia/nvtx/lib' | ||||
|             '$ORIGIN/../../nvidia/cufile/lib' | ||||
|         ) | ||||
|         if [[ $USE_CUFILE == 1 ]]; then | ||||
|             CUDA_RPATHS+=( | ||||
|                 '$ORIGIN/../../nvidia/cufile/lib' | ||||
|             ) | ||||
|         fi | ||||
|         CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") | ||||
|         export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' | ||||
|         export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' | ||||
| @ -181,25 +214,11 @@ if [[ $CUDA_VERSION == 12* ]]; then | ||||
|     fi | ||||
| elif [[ $CUDA_VERSION == "11.8" ]]; then | ||||
|     export USE_STATIC_CUDNN=0 | ||||
|     # Turn USE_CUFILE off for CUDA 11.8 since nvidia-cufile-cu11 and 1.9.0.20 are | ||||
|     # not available in PYPI | ||||
|     export USE_CUFILE=0 | ||||
|     # Try parallelizing nvcc as well | ||||
|     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" | ||||
|     # Bundle ptxas into the wheel, see https://github.com/pytorch/pytorch/pull/119750 | ||||
|     export BUILD_BUNDLE_PTXAS=1 | ||||
|  | ||||
|     # CUDA 11.8 have to ship the libcusparseLt.so.0 with the binary | ||||
|     # since nvidia-cusparselt-cu11 is not available in PYPI | ||||
|     if [[ $USE_CUSPARSELT == "1" ]]; then | ||||
|         DEPS_SONAME+=( | ||||
|             "libcusparseLt.so.0" | ||||
|         ) | ||||
|         DEPS_LIST+=( | ||||
|             "/usr/local/cuda/lib64/libcusparseLt.so.0" | ||||
|         ) | ||||
|     fi | ||||
|  | ||||
|     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then | ||||
|         echo "Bundling with cudnn and cublas." | ||||
|         DEPS_LIST+=( | ||||
|  | ||||
| @ -22,7 +22,9 @@ retry () { | ||||
|  | ||||
| # TODO move this into the Docker images | ||||
| OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release` | ||||
| if [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
| if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then | ||||
|     retry yum install -q -y zip openssl | ||||
| elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then | ||||
|     retry yum install -q -y zip openssl | ||||
| elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then | ||||
|     retry dnf install -q -y zip openssl | ||||
| @ -33,9 +35,6 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then | ||||
|     sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list") | ||||
|     retry apt-get update | ||||
|     retry apt-get -y install zip openssl | ||||
| else | ||||
|     echo "Unknown OS: '$OS_NAME'" | ||||
|     exit 1 | ||||
| fi | ||||
|  | ||||
| # Version: setup.py uses $PYTORCH_BUILD_VERSION.post$PYTORCH_BUILD_NUMBER if | ||||
| @ -96,6 +95,12 @@ python setup.py clean | ||||
| retry pip install -qr requirements.txt | ||||
| retry pip install -q numpy==2.0.1 | ||||
|  | ||||
| if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     export _GLIBCXX_USE_CXX11_ABI=1 | ||||
| else | ||||
|     export _GLIBCXX_USE_CXX11_ABI=0 | ||||
| fi | ||||
|  | ||||
| if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|     echo "Calling build_amd.py at $(date)" | ||||
|     python tools/amd_build/build_amd.py | ||||
| @ -164,6 +169,12 @@ fi | ||||
|  | ||||
| ) | ||||
|  | ||||
| if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     LIBTORCH_ABI="cxx11-abi-" | ||||
| else | ||||
|     LIBTORCH_ABI= | ||||
| fi | ||||
|  | ||||
| ( | ||||
|     set -x | ||||
|  | ||||
|  | ||||
| @ -20,11 +20,7 @@ fi | ||||
| source /opt/intel/oneapi/compiler/latest/env/vars.sh | ||||
| source /opt/intel/oneapi/pti/latest/env/vars.sh | ||||
| source /opt/intel/oneapi/umf/latest/env/vars.sh | ||||
| source /opt/intel/oneapi/ccl/latest/env/vars.sh | ||||
| source /opt/intel/oneapi/mpi/latest/env/vars.sh | ||||
| export USE_STATIC_MKL=1 | ||||
| export USE_ONEMKL=1 | ||||
| export USE_XCCL=1 | ||||
|  | ||||
| WHEELHOUSE_DIR="wheelhousexpu" | ||||
| LIBTORCH_HOUSE_DIR="libtorch_housexpu" | ||||
|  | ||||
| @ -10,3 +10,5 @@ example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are | ||||
| built on Jenkins and are used in triggered builds already have this | ||||
| environment variable set in their manifest. Also see | ||||
| `./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`. | ||||
|  | ||||
| Our Jenkins installation is located at https://ci.pytorch.org/jenkins/. | ||||
|  | ||||
| @ -35,7 +35,7 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then | ||||
|   if [[ "$BUILD_ENVIRONMENT" != *clang* ]]; then | ||||
|   if [[ "$BUILD_ENVIRONMENT" != *cuda11.3* && "$BUILD_ENVIRONMENT" != *clang* ]]; then | ||||
|     # TODO: there is a linking issue when building with UCC using clang, | ||||
|     # disable it for now and to be fix later. | ||||
|     # TODO: disable UCC temporarily to enable CUDA 12.1 in CI | ||||
| @ -171,12 +171,6 @@ fi | ||||
| if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|   # shellcheck disable=SC1091 | ||||
|   source /opt/intel/oneapi/compiler/latest/env/vars.sh | ||||
|   # shellcheck disable=SC1091 | ||||
|   source /opt/intel/oneapi/ccl/latest/env/vars.sh | ||||
|   # shellcheck disable=SC1091 | ||||
|   source /opt/intel/oneapi/mpi/latest/env/vars.sh | ||||
|   # Enable XCCL build | ||||
|   export USE_XCCL=1 | ||||
|   # XPU kineto feature dependencies are not fully ready, disable kineto build as temp WA | ||||
|   export USE_KINETO=0 | ||||
|   export TORCH_XPU_ARCH_LIST=pvc | ||||
| @ -283,8 +277,10 @@ else | ||||
|     # or building non-XLA tests. | ||||
|     if [[ "$BUILD_ENVIRONMENT" != *rocm*  && | ||||
|           "$BUILD_ENVIRONMENT" != *xla* ]]; then | ||||
|       # Install numpy-2.0.2 for builds which are backward compatible with 1.X | ||||
|       python -mpip install numpy==2.0.2 | ||||
|       if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then | ||||
|         # Install numpy-2.0.2 for builds which are backward compatible with 1.X | ||||
|         python -mpip install numpy==2.0.2 | ||||
|       fi | ||||
|  | ||||
|       WERROR=1 python setup.py clean | ||||
|  | ||||
| @ -307,18 +303,6 @@ else | ||||
|     fi | ||||
|     pip_install_whl "$(echo dist/*.whl)" | ||||
|  | ||||
|     if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|       echo "Checking that xpu is compiled" | ||||
|       pushd dist/ | ||||
|       if python -c 'import torch; exit(0 if torch.xpu._is_compiled() else 1)'; then | ||||
|         echo "XPU support is compiled in." | ||||
|       else | ||||
|         echo "XPU support is NOT compiled in." | ||||
|         exit 1 | ||||
|       fi | ||||
|       popd | ||||
|     fi | ||||
|  | ||||
|     # TODO: I'm not sure why, but somehow we lose verbose commands | ||||
|     set -x | ||||
|  | ||||
|  | ||||
| @ -59,16 +59,78 @@ else | ||||
|   export install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/" | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Setup XPU ENV | ||||
| ############################################################################### | ||||
| if [[ "$DESIRED_CUDA" == 'xpu' ]]; then | ||||
|   set +u | ||||
|   # Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html | ||||
|   source /opt/intel/oneapi/compiler/latest/env/vars.sh | ||||
|   source /opt/intel/oneapi/pti/latest/env/vars.sh | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check GCC ABI | ||||
| ############################################################################### | ||||
|  | ||||
| # NOTE: As of https://github.com/pytorch/pytorch/issues/126551 we only produce | ||||
| #       wheels with cxx11-abi | ||||
| # NOTE [ Building libtorch with old vs. new gcc ABI ] | ||||
| # | ||||
| # Packages built with one version of ABI could not be linked against by client | ||||
| # C++ libraries that were compiled using the other version of ABI. Since both | ||||
| # gcc ABIs are still common in the wild, we need to support both ABIs. Currently: | ||||
| # | ||||
| # - All the nightlies built on CentOS 7 + devtoolset7 use the old gcc ABI. | ||||
| # - All the nightlies built on Ubuntu 16.04 + gcc 5.4 use the new gcc ABI. | ||||
|  | ||||
| echo "Checking that the gcc ABI is what we expect" | ||||
| if [[ "$(uname)" != 'Darwin' ]]; then | ||||
|   # We also check that there are cxx11 symbols in libtorch | ||||
|   function is_expected() { | ||||
|     if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* || "$DESIRED_CUDA" == *"rocm"* ]]; then | ||||
|       if [[ "$1" -gt 0 || "$1" == "ON " ]]; then | ||||
|         echo 1 | ||||
|       fi | ||||
|     else | ||||
|       if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then | ||||
|         echo 1 | ||||
|       fi | ||||
|     fi | ||||
|   } | ||||
|  | ||||
|   # First we check that the env var in TorchConfig.cmake is correct | ||||
|  | ||||
|   # We search for D_GLIBCXX_USE_CXX11_ABI=1 in torch/TorchConfig.cmake | ||||
|   torch_config="${install_root}/share/cmake/Torch/TorchConfig.cmake" | ||||
|   if [[ ! -f "$torch_config" ]]; then | ||||
|     echo "No TorchConfig.cmake found!" | ||||
|     ls -lah "$install_root/share/cmake/Torch" | ||||
|     exit 1 | ||||
|   fi | ||||
|   echo "Checking the TorchConfig.cmake" | ||||
|   cat "$torch_config" | ||||
|  | ||||
|   # The sed call below is | ||||
|   #   don't print lines by default (only print the line we want) | ||||
|   # -n | ||||
|   #   execute the following expression | ||||
|   # e | ||||
|   #   replace lines that match with the first capture group and print | ||||
|   # s/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p | ||||
|   #   any characters, D_GLIBCXX_USE_CXX11_ABI=, exactly one any character, a | ||||
|   #   quote, any characters | ||||
|   #   Note the exactly one single character after the '='. In the case that the | ||||
|   #     variable is not set the '=' will be followed by a '"' immediately and the | ||||
|   #     line will fail the match and nothing will be printed; this is what we | ||||
|   #     want.  Otherwise it will capture the 0 or 1 after the '='. | ||||
|   # /.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/ | ||||
|   #   replace the matched line with the capture group and print | ||||
|   # /\1/p | ||||
|   actual_gcc_abi="$(sed -ne 's/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p' < "$torch_config")" | ||||
|   if [[ "$(is_expected "$actual_gcc_abi")" != 1 ]]; then | ||||
|     echo "gcc ABI $actual_gcc_abi not as expected." | ||||
|     exit 1 | ||||
|   fi | ||||
|  | ||||
|   # We also check that there are [not] cxx11 symbols in libtorch | ||||
|   # | ||||
|   echo "Checking that symbols in libtorch.so have the right gcc abi" | ||||
|   python3 "$(dirname ${BASH_SOURCE[0]})/smoke_test/check_binary_symbols.py" | ||||
| @ -146,11 +208,35 @@ setup_link_flags () { | ||||
|  | ||||
| TEST_CODE_DIR="$(dirname $(realpath ${BASH_SOURCE[0]}))/test_example_code" | ||||
| build_and_run_example_cpp () { | ||||
|   if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     GLIBCXX_USE_CXX11_ABI=1 | ||||
|   else | ||||
|     GLIBCXX_USE_CXX11_ABI=0 | ||||
|   fi | ||||
|   setup_link_flags | ||||
|   g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1 | ||||
|   g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1 | ||||
|   ./$1 | ||||
| } | ||||
|  | ||||
| build_example_cpp_with_incorrect_abi () { | ||||
|   if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     GLIBCXX_USE_CXX11_ABI=0 | ||||
|   else | ||||
|     GLIBCXX_USE_CXX11_ABI=1 | ||||
|   fi | ||||
|   set +e | ||||
|   setup_link_flags | ||||
|   g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1 | ||||
|   ERRCODE=$? | ||||
|   set -e | ||||
|   if [ "$ERRCODE" -eq "0" ]; then | ||||
|     echo "Building example with incorrect ABI didn't throw error. Aborting." | ||||
|     exit 1 | ||||
|   else | ||||
|     echo "Building example with incorrect ABI throws expected error. Proceeding." | ||||
|   fi | ||||
| } | ||||
|  | ||||
| ############################################################################### | ||||
| # Check simple Python/C++ calls | ||||
| ############################################################################### | ||||
| @ -160,6 +246,11 @@ if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then | ||||
|     export LD_LIBRARY_PATH=/usr/local/cuda/lib64 | ||||
|   fi | ||||
|   build_and_run_example_cpp simple-torch-test | ||||
|   # `_GLIBCXX_USE_CXX11_ABI` is always ignored by gcc in devtoolset7, so we test | ||||
|   # the expected failure case for Ubuntu 16.04 + gcc 5.4 only. | ||||
|   if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then | ||||
|     build_example_cpp_with_incorrect_abi simple-torch-test | ||||
|   fi | ||||
| else | ||||
|   pushd /tmp | ||||
|   python -c 'import torch' | ||||
| @ -216,14 +307,6 @@ else | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check XPU configured correctly | ||||
| ############################################################################### | ||||
| if [[ "$DESIRED_CUDA" == 'xpu' && "$PACKAGE_TYPE" != 'libtorch' ]]; then | ||||
|   echo "Checking that xpu is compiled" | ||||
|   python -c 'import torch; exit(0 if torch.xpu._is_compiled() else 1)' | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check CUDA configured correctly | ||||
| ############################################################################### | ||||
| @ -302,22 +385,10 @@ except RuntimeError as e: | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check for C++ ABI compatibility to GCC-11 - GCC 13 | ||||
| # Check for C++ ABI compatibility between gcc7 and gcc9 compiled binaries | ||||
| ############################################################################### | ||||
| if [[ "$(uname)" == 'Linux' &&  "$PACKAGE_TYPE" == 'manywheel' ]]; then | ||||
|   pushd /tmp | ||||
|   # Per https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html | ||||
|   # gcc-11 is ABI16, gcc-13 is ABI18, gcc-14 is ABI19 | ||||
|   # gcc 11 - CUDA 11.8, xpu, rocm | ||||
|   # gcc 13 - CUDA 12.6, 12.8 and cpu | ||||
|   # Please see issue for reference: https://github.com/pytorch/pytorch/issues/152426 | ||||
|   if [[ "$(uname -m)" == "s390x" ]]; then | ||||
|     cxx_abi="19" | ||||
|   elif [[ "$DESIRED_CUDA" != 'cu118' && "$DESIRED_CUDA" != 'xpu' && "$DESIRED_CUDA" != 'rocm'* ]]; then | ||||
|     cxx_abi="18" | ||||
|   else | ||||
|     cxx_abi="16" | ||||
|   fi | ||||
|   python -c "import torch; exit(0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi10${cxx_abi}' else 1)" | ||||
|   python -c "import torch; exit(0 if torch.compiled_with_cxx11_abi() else (0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1011' else 1))" | ||||
|   popd | ||||
| fi | ||||
|  | ||||
| @ -13,6 +13,10 @@ if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then | ||||
|   # HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors | ||||
|   unset HIP_PLATFORM | ||||
|   export PYTORCH_TEST_WITH_ROCM=1 | ||||
|   # temporary to locate some kernel issues on the CI nodes | ||||
|   export HSAKMT_DEBUG_LEVEL=4 | ||||
|   # improve rccl performance for distributed tests | ||||
|   export HSA_FORCE_FINE_GRAIN_PCIE=1 | ||||
| fi | ||||
|  | ||||
| # TODO: Renable libtorch testing for MacOS, see https://github.com/pytorch/pytorch/issues/62598 | ||||
|  | ||||
| @ -202,7 +202,7 @@ function install_torchrec_and_fbgemm() { | ||||
|  | ||||
| function clone_pytorch_xla() { | ||||
|   if [[ ! -d ./xla ]]; then | ||||
|     git clone --recursive --quiet https://github.com/pytorch/xla.git | ||||
|     git clone --recursive -b r2.7 https://github.com/pytorch/xla.git | ||||
|     pushd xla | ||||
|     # pin the xla hash so that we don't get broken by changes to xla | ||||
|     git checkout "$(cat ../.github/ci_commit_pins/xla.txt)" | ||||
|  | ||||
| @ -1,50 +1,31 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # Script for installing sccache on the xla build job, which uses xla's docker | ||||
| # image, which has sccache installed but doesn't write the stubs.  This is | ||||
| # mostly copied from .ci/docker/install_cache.sh.  Changes are: removing checks | ||||
| # that will always return the same thing, ex checks for for rocm, CUDA, changing | ||||
| # the path where sccache is installed, not changing /etc/environment, and not | ||||
| # installing/downloading sccache as it is already in the docker image. | ||||
| # image and doesn't have sccache installed on it.  This is mostly copied from | ||||
| # .ci/docker/install_cache.sh.  Changes are: removing checks that will always | ||||
| # return the same thing, ex checks for for rocm, CUDA, and changing the path | ||||
| # where sccache is installed, and not changing /etc/environment. | ||||
|  | ||||
| set -ex -o pipefail | ||||
|  | ||||
| install_binary() { | ||||
|   echo "Downloading sccache binary from S3 repo" | ||||
|   curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /tmp/cache/bin/sccache | ||||
| } | ||||
|  | ||||
| mkdir -p /tmp/cache/bin | ||||
| mkdir -p /tmp/cache/lib | ||||
| export PATH="/tmp/cache/bin:$PATH" | ||||
|  | ||||
| install_binary | ||||
| chmod a+x /tmp/cache/bin/sccache | ||||
|  | ||||
| function write_sccache_stub() { | ||||
|   # Unset LD_PRELOAD for ps because of asan + ps issues | ||||
|   # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589 | ||||
|   if [ "$1" == "gcc" ]; then | ||||
|     # Do not call sccache recursively when dumping preprocessor argument | ||||
|     # For some reason it's very important for the first cached nvcc invocation | ||||
|     cat >"/tmp/cache/bin/$1" <<EOF | ||||
| #!/bin/sh | ||||
|  | ||||
| # sccache does not support -E flag, so we need to call the original compiler directly in order to avoid calling this wrapper recursively | ||||
| for arg in "\$@"; do | ||||
|   if [ "\$arg" = "-E" ]; then | ||||
|     exec $(which "$1") "\$@" | ||||
|   fi | ||||
| done | ||||
|  | ||||
| if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then | ||||
|   exec sccache $(which "$1") "\$@" | ||||
| else | ||||
|   exec $(which "$1") "\$@" | ||||
| fi | ||||
| EOF | ||||
|   else | ||||
|     cat >"/tmp/cache/bin/$1" <<EOF | ||||
| #!/bin/sh | ||||
|  | ||||
| if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then | ||||
|   exec sccache $(which "$1") "\$@" | ||||
| else | ||||
|   exec $(which "$1") "\$@" | ||||
| fi | ||||
| EOF | ||||
|   fi | ||||
|   # shellcheck disable=SC2086 | ||||
|   # shellcheck disable=SC2059 | ||||
|   printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n  exec sccache $(which $1) \"\$@\"\nelse\n  exec $(which $1) \"\$@\"\nfi" > "/tmp/cache/bin/$1" | ||||
|   chmod a+x "/tmp/cache/bin/$1" | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -33,15 +33,56 @@ if which sccache > /dev/null; then | ||||
|   export PATH="${tmp_dir}:$PATH" | ||||
| fi | ||||
|  | ||||
| print_cmake_info | ||||
| if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then | ||||
|   # Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls | ||||
|   USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel | ||||
| else | ||||
| cross_compile_arm64() { | ||||
|   # Cross compilation for arm64 | ||||
|   # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests | ||||
|   # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448 | ||||
|   USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64 | ||||
|   USE_DISTRIBUTED=0 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel | ||||
| } | ||||
|  | ||||
| compile_arm64() { | ||||
|   # Compilation for arm64 | ||||
|   # TODO: Compile with OpenMP support (but this causes CI regressions as cross-compilation were done with OpenMP disabled) | ||||
|   USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel | ||||
| } | ||||
|  | ||||
| compile_x86_64() { | ||||
|   USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel --plat-name=macosx_10_9_x86_64 | ||||
| } | ||||
|  | ||||
| build_lite_interpreter() { | ||||
|     echo "Testing libtorch (lite interpreter)." | ||||
|  | ||||
|     CPP_BUILD="$(pwd)/../cpp_build" | ||||
|     # Ensure the removal of the tmp directory | ||||
|     trap 'rm -rfv ${CPP_BUILD}' EXIT | ||||
|     rm -rf "${CPP_BUILD}" | ||||
|     mkdir -p "${CPP_BUILD}/caffe2" | ||||
|  | ||||
|     # It looks libtorch need to be built in "${CPP_BUILD}/caffe2 folder. | ||||
|     BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py | ||||
|     pushd "${CPP_BUILD}/caffe2" || exit | ||||
|     VERBOSE=1 DEBUG=1 python "${BUILD_LIBTORCH_PY}" | ||||
|     popd || exit | ||||
|  | ||||
|     "${CPP_BUILD}/caffe2/build/bin/test_lite_interpreter_runtime" | ||||
| } | ||||
|  | ||||
| print_cmake_info | ||||
|  | ||||
| if [[ ${BUILD_ENVIRONMENT} = *arm64* ]]; then | ||||
|   if [[ $(uname -m) == "arm64" ]]; then | ||||
|     compile_arm64 | ||||
|   else | ||||
|     cross_compile_arm64 | ||||
|   fi | ||||
| elif [[ ${BUILD_ENVIRONMENT} = *lite-interpreter* ]]; then | ||||
|   export BUILD_LITE_INTERPRETER=1 | ||||
|   build_lite_interpreter | ||||
| else | ||||
|   compile_x86_64 | ||||
| fi | ||||
|  | ||||
| if which sccache > /dev/null; then | ||||
|   print_sccache_stats | ||||
| fi | ||||
|  | ||||
| @ -20,4 +20,14 @@ print_cmake_info() { | ||||
|   CONDA_INSTALLATION_DIR=$(dirname "$CMAKE_EXEC") | ||||
|   # Print all libraries under cmake rpath for debugging | ||||
|   ls -la "$CONDA_INSTALLATION_DIR/../lib" | ||||
|  | ||||
|   export CMAKE_EXEC | ||||
|   # Explicitly add conda env lib folder to cmake rpath to address the flaky issue | ||||
|   # where cmake dependencies couldn't be found. This seems to point to how conda | ||||
|   # links $CMAKE_EXEC to its package cache when cloning a new environment | ||||
|   install_name_tool -add_rpath @executable_path/../lib "${CMAKE_EXEC}" || true | ||||
|   # Adding the rpath will invalidate cmake signature, so signing it again here | ||||
|   # to trust the executable. EXC_BAD_ACCESS (SIGKILL (Code Signature Invalid)) | ||||
|   # with an exit code 137 otherwise | ||||
|   codesign -f -s - "${CMAKE_EXEC}" || true | ||||
| } | ||||
|  | ||||
| @ -42,16 +42,6 @@ test_python_all() { | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_python_mps() { | ||||
|   setup_test_python | ||||
|  | ||||
|   time python test/run_test.py --verbose --mps | ||||
|   MTL_CAPTURE_ENABLED=1 ${CONDA_RUN} python3 test/test_mps.py --verbose -k test_metal_capture | ||||
|  | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
|  | ||||
| test_python_shard() { | ||||
|   if [[ -z "$NUM_TEST_SHARDS" ]]; then | ||||
|     echo "NUM_TEST_SHARDS must be defined to run a Python test shard" | ||||
| @ -165,7 +155,6 @@ test_jit_hooks() { | ||||
| torchbench_setup_macos() { | ||||
|   git clone --recursive https://github.com/pytorch/vision torchvision | ||||
|   git clone --recursive https://github.com/pytorch/audio torchaudio | ||||
|   brew install jpeg-turbo libpng | ||||
|  | ||||
|   pushd torchvision | ||||
|   git fetch | ||||
| @ -180,8 +169,7 @@ torchbench_setup_macos() { | ||||
|   git checkout "$(cat ../.github/ci_commit_pins/audio.txt)" | ||||
|   git submodule update --init --recursive | ||||
|   python setup.py clean | ||||
|   #TODO: Remove me, when figure out how to make TorchAudio find brew installed openmp | ||||
|   USE_OPENMP=0 python setup.py develop | ||||
|   python setup.py develop | ||||
|   popd | ||||
|  | ||||
|   # Shellcheck doesn't like it when you pass no arguments to a function that can take args. See https://www.shellcheck.net/wiki/SC2120 | ||||
| @ -189,8 +177,9 @@ torchbench_setup_macos() { | ||||
|   checkout_install_torchbench | ||||
| } | ||||
|  | ||||
| pip_benchmark_deps() { | ||||
|   python -mpip install --no-input astunparse requests cython scikit-learn | ||||
| conda_benchmark_deps() { | ||||
|   conda install -y astunparse numpy scipy ninja pyyaml setuptools cmake typing-extensions requests protobuf numba cython scikit-learn | ||||
|   conda install -y -c conda-forge librosa | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -198,7 +187,7 @@ test_torchbench_perf() { | ||||
|   print_cmake_info | ||||
|  | ||||
|   echo "Launching torchbench setup" | ||||
|   pip_benchmark_deps | ||||
|   conda_benchmark_deps | ||||
|   torchbench_setup_macos | ||||
|  | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
| @ -225,62 +214,32 @@ test_torchbench_smoketest() { | ||||
|   print_cmake_info | ||||
|  | ||||
|   echo "Launching torchbench setup" | ||||
|   pip_benchmark_deps | ||||
|   conda_benchmark_deps | ||||
|   # shellcheck disable=SC2119,SC2120 | ||||
|   torchbench_setup_macos | ||||
|  | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|  | ||||
|   local backend=eager | ||||
|   local dtype=notset | ||||
|   local device=mps | ||||
|   local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor) | ||||
|   local hf_models=(GoogleFnet YituTechConvBert Speech2Text2ForCausalLM) | ||||
|  | ||||
|   for backend in eager inductor; do | ||||
|   touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" | ||||
|   touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" | ||||
|  | ||||
|     for dtype in notset float16 bfloat16; do | ||||
|       echo "Launching torchbench inference performance run for backend ${backend} and dtype ${dtype}" | ||||
|       local dtype_arg="--${dtype}" | ||||
|       if [ "$dtype" == notset ]; then | ||||
|           dtype_arg="--float32" | ||||
|       fi | ||||
|       touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" | ||||
|       for model in "${models[@]}"; do | ||||
|         PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|           --performance --only "$model" --backend "$backend" --inference --devices "$device" "$dtype_arg" \ | ||||
|           --output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" || true | ||||
|         if [ "$backend" == "inductor" ]; then | ||||
|           PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|             --accuracy --only "$model" --backend "$backend" --inference --devices "$device" "$dtype_arg" \ | ||||
|             --output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_accuracy.csv" || true | ||||
|         fi | ||||
|       done | ||||
|       for model in "${hf_models[@]}"; do | ||||
|         if [ "$backend" == "inductor" ]; then | ||||
|           PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/huggingface.py \ | ||||
|             --performance --only "$model" --backend "$backend" --inference --devices "$device" "$dtype_arg" \ | ||||
|             --output "$TEST_REPORTS_DIR/inductor_${backend}_huggingface_${dtype}_inference_${device}_performance.csv" || true | ||||
|           PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/huggingface.py \ | ||||
|             --accuracy --only "$model" --backend "$backend" --inference --devices "$device" "$dtype_arg" \ | ||||
|             --output "$TEST_REPORTS_DIR/inductor_${backend}_huggingface_${dtype}_inference_${device}_accuracy.csv" || true | ||||
|         fi | ||||
|       done | ||||
|     done | ||||
|  | ||||
|     for dtype in notset amp; do | ||||
|       echo "Launching torchbench training performance run for backend ${backend} and dtype ${dtype}" | ||||
|       touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" | ||||
|       local dtype_arg="--${dtype}" | ||||
|       if [ "$dtype" == notset ]; then | ||||
|           dtype_arg="--float32" | ||||
|       fi | ||||
|       for model in "${models[@]}"; do | ||||
|         PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|           --performance --only "$model" --backend "$backend" --training --devices "$device" "$dtype_arg" \ | ||||
|           --output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" || true | ||||
|       done | ||||
|     done | ||||
|   echo "Setup complete, launching torchbench training performance run" | ||||
|   for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do | ||||
|     PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|       --performance --only "$model" --backend "$backend" --training --devices "$device" \ | ||||
|       --output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" | ||||
|   done | ||||
|  | ||||
|   echo "Launching torchbench inference performance run" | ||||
|   for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do | ||||
|     PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|       --performance --only "$model" --backend "$backend" --inference --devices "$device" \ | ||||
|       --output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" | ||||
|   done | ||||
|  | ||||
|   echo "Pytorch benchmark on mps device completed" | ||||
| @ -290,7 +249,7 @@ test_hf_perf() { | ||||
|   print_cmake_info | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|   pip_benchmark_deps | ||||
|   conda_benchmark_deps | ||||
|   torchbench_setup_macos | ||||
|  | ||||
|   echo "Launching HuggingFace training perf run" | ||||
| @ -306,7 +265,7 @@ test_timm_perf() { | ||||
|   print_cmake_info | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|   pip_benchmark_deps | ||||
|   conda_benchmark_deps | ||||
|   torchbench_setup_macos | ||||
|  | ||||
|   echo "Launching timm training perf run" | ||||
| @ -332,8 +291,6 @@ elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then | ||||
|   test_timm_perf | ||||
| elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then | ||||
|   test_torchbench_smoketest | ||||
| elif [[ $TEST_CONFIG == *"mps"* ]]; then | ||||
|   test_python_mps | ||||
| elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then | ||||
|   test_python_shard "${SHARD_NUMBER}" | ||||
|   if [[ "${SHARD_NUMBER}" == 1 ]]; then | ||||
|  | ||||
							
								
								
									
										22
									
								
								.ci/pytorch/perf_test/common.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								.ci/pytorch/perf_test/common.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,22 @@ | ||||
| #!/bin/bash | ||||
| set -e | ||||
|  | ||||
| run_test () { | ||||
|   rm -rf test_tmp/ && mkdir test_tmp/ && cd test_tmp/ | ||||
|   "$@" | ||||
|   cd .. && rm -rf test_tmp/ | ||||
| } | ||||
|  | ||||
| get_runtime_of_command () { | ||||
|   TIMEFORMAT=%R | ||||
|  | ||||
|   # runtime=$( { time ($@ &> /dev/null); } 2>&1 1>/dev/null) | ||||
|   runtime=$( { time "$@"; } 2>&1 1>/dev/null) | ||||
|   if [[ $runtime == *"Error"* ]]; then | ||||
|     exit 1 | ||||
|   fi | ||||
|   runtime=${runtime#+++ $@} | ||||
|   runtime=$(python -c "print($runtime)") | ||||
|  | ||||
|   echo "$runtime" | ||||
| } | ||||
							
								
								
									
										91
									
								
								.ci/pytorch/perf_test/compare_with_baseline.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								.ci/pytorch/perf_test/compare_with_baseline.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,91 @@ | ||||
| import argparse | ||||
| import json | ||||
| import math | ||||
| import sys | ||||
|  | ||||
|  | ||||
| parser = argparse.ArgumentParser() | ||||
| parser.add_argument( | ||||
|     "--test-name", dest="test_name", action="store", required=True, help="test name" | ||||
| ) | ||||
| parser.add_argument( | ||||
|     "--sample-stats", | ||||
|     dest="sample_stats", | ||||
|     action="store", | ||||
|     required=True, | ||||
|     help="stats from sample", | ||||
| ) | ||||
| parser.add_argument( | ||||
|     "--update", | ||||
|     action="store_true", | ||||
|     help="whether to update baseline using stats from sample", | ||||
| ) | ||||
| args = parser.parse_args() | ||||
|  | ||||
| test_name = args.test_name | ||||
|  | ||||
| if "cpu" in test_name: | ||||
|     backend = "cpu" | ||||
| elif "gpu" in test_name: | ||||
|     backend = "gpu" | ||||
|  | ||||
| data_file_path = f"../{backend}_runtime.json" | ||||
|  | ||||
| with open(data_file_path) as data_file: | ||||
|     data = json.load(data_file) | ||||
|  | ||||
| if test_name in data: | ||||
|     mean = float(data[test_name]["mean"]) | ||||
|     sigma = float(data[test_name]["sigma"]) | ||||
| else: | ||||
|     # Let the test pass if baseline number doesn't exist | ||||
|     mean = sys.maxsize | ||||
|     sigma = 0.001 | ||||
|  | ||||
| print("population mean: ", mean) | ||||
| print("population sigma: ", sigma) | ||||
|  | ||||
| # Let the test pass if baseline number is NaN (which happened in | ||||
| # the past when we didn't have logic for catching NaN numbers) | ||||
| if math.isnan(mean) or math.isnan(sigma): | ||||
|     mean = sys.maxsize | ||||
|     sigma = 0.001 | ||||
|  | ||||
| sample_stats_data = json.loads(args.sample_stats) | ||||
|  | ||||
| sample_mean = float(sample_stats_data["mean"]) | ||||
| sample_sigma = float(sample_stats_data["sigma"]) | ||||
|  | ||||
| print("sample mean: ", sample_mean) | ||||
| print("sample sigma: ", sample_sigma) | ||||
|  | ||||
| if math.isnan(sample_mean): | ||||
|     raise Exception("""Error: sample mean is NaN""")  # noqa: TRY002 | ||||
| elif math.isnan(sample_sigma): | ||||
|     raise Exception("""Error: sample sigma is NaN""")  # noqa: TRY002 | ||||
|  | ||||
| z_value = (sample_mean - mean) / sigma | ||||
|  | ||||
| print("z-value: ", z_value) | ||||
|  | ||||
| if z_value >= 3: | ||||
|     raise Exception(  # noqa: TRY002 | ||||
|         f"""\n | ||||
| z-value >= 3, there is high chance of perf regression.\n | ||||
| To reproduce this regression, run | ||||
| `cd .ci/pytorch/perf_test/ && bash {test_name}.sh` on your local machine | ||||
| and compare the runtime before/after your code change. | ||||
| """ | ||||
|     ) | ||||
| else: | ||||
|     print("z-value < 3, no perf regression detected.") | ||||
|     if args.update: | ||||
|         print("We will use these numbers as new baseline.") | ||||
|         new_data_file_path = f"../new_{backend}_runtime.json" | ||||
|         with open(new_data_file_path) as new_data_file: | ||||
|             new_data = json.load(new_data_file) | ||||
|         new_data[test_name] = {} | ||||
|         new_data[test_name]["mean"] = sample_mean | ||||
|         new_data[test_name]["sigma"] = max(sample_sigma, sample_mean * 0.1) | ||||
|         with open(new_data_file_path, "w") as new_data_file: | ||||
|             json.dump(new_data, new_data_file, indent=4) | ||||
							
								
								
									
										18
									
								
								.ci/pytorch/perf_test/get_stats.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								.ci/pytorch/perf_test/get_stats.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,18 @@ | ||||
| import json | ||||
| import sys | ||||
|  | ||||
| import numpy | ||||
|  | ||||
|  | ||||
| sample_data_list = sys.argv[1:] | ||||
| sample_data_list = [float(v.strip()) for v in sample_data_list] | ||||
|  | ||||
| sample_mean = numpy.mean(sample_data_list) | ||||
| sample_sigma = numpy.std(sample_data_list) | ||||
|  | ||||
| data = { | ||||
|     "mean": sample_mean, | ||||
|     "sigma": sample_sigma, | ||||
| } | ||||
|  | ||||
| print(json.dumps(data)) | ||||
| @ -0,0 +1,43 @@ | ||||
| #!/bin/bash | ||||
| set -e | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_cpu_speed_mini_sequence_labeler () { | ||||
|   echo "Testing: mini sequence labeler, CPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/benchmark.git | ||||
|  | ||||
|   cd benchmark/ | ||||
|  | ||||
|   git checkout 726567a455edbfda6199445922a8cfee82535664 | ||||
|  | ||||
|   cd scripts/mini_sequence_labeler | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python main.py) | ||||
|     SAMPLE_ARRAY+=("${runtime}") | ||||
|   done | ||||
|  | ||||
|   cd ../../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}") | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo "$stats" | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_cpu_speed_mini_sequence_labeler "$@" | ||||
| fi | ||||
							
								
								
									
										45
									
								
								.ci/pytorch/perf_test/test_cpu_speed_mnist.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								.ci/pytorch/perf_test/test_cpu_speed_mnist.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,45 @@ | ||||
| #!/bin/bash | ||||
| set -e | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_cpu_speed_mnist () { | ||||
|   echo "Testing: MNIST, CPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/examples.git -b perftests | ||||
|  | ||||
|   cd examples/mnist | ||||
|  | ||||
|   conda install -c pytorch torchvision-cpu | ||||
|  | ||||
|   # Download data | ||||
|   python main.py --epochs 0 | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log) | ||||
|     echo "$runtime" | ||||
|     SAMPLE_ARRAY+=("${runtime}") | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}") | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo "$stats" | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_cpu_speed_mnist "$@" | ||||
| fi | ||||
							
								
								
									
										29
									
								
								.ci/pytorch/perf_test/test_cpu_speed_torch.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								.ci/pytorch/perf_test/test_cpu_speed_torch.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,29 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_cpu_speed_torch () { | ||||
|   echo "Testing: torch.*, CPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/yf225/perf-tests.git | ||||
|  | ||||
|   if [ "$1" == "compare_with_baseline" ]; then | ||||
|     export ARGS=(--compare ../cpu_runtime.json) | ||||
|   elif [ "$1" == "compare_and_update" ]; then | ||||
|     export ARGS=(--compare ../cpu_runtime.json --update ../new_cpu_runtime.json) | ||||
|   elif [ "$1" == "update_only" ]; then | ||||
|     export ARGS=(--update ../new_cpu_runtime.json) | ||||
|   fi | ||||
|  | ||||
|   if ! python perf-tests/modules/test_cpu_torch.py "${ARGS[@]}"; then | ||||
|     echo "To reproduce this regression, run \`cd .ci/pytorch/perf_test/ && bash ${FUNCNAME[0]}.sh\` on your local machine and compare the runtime before/after your code change." | ||||
|     exit 1 | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_cpu_speed_torch "$@" | ||||
| fi | ||||
							
								
								
									
										29
									
								
								.ci/pytorch/perf_test/test_cpu_speed_torch_tensor.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								.ci/pytorch/perf_test/test_cpu_speed_torch_tensor.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,29 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_cpu_speed_torch_tensor () { | ||||
|   echo "Testing: torch.Tensor.*, CPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/yf225/perf-tests.git | ||||
|  | ||||
|   if [ "$1" == "compare_with_baseline" ]; then | ||||
|     export ARGS=(--compare ../cpu_runtime.json) | ||||
|   elif [ "$1" == "compare_and_update" ]; then | ||||
|     export ARGS=(--compare ../cpu_runtime.json --update ../new_cpu_runtime.json) | ||||
|   elif [ "$1" == "update_only" ]; then | ||||
|     export ARGS=(--update ../new_cpu_runtime.json) | ||||
|   fi | ||||
|  | ||||
|   if ! python perf-tests/modules/test_cpu_torch_tensor.py "${ARGS[@]}"; then | ||||
|     echo "To reproduce this regression, run \`cd .ci/pytorch/perf_test/ && bash ${FUNCNAME[0]}.sh\` on your local machine and compare the runtime before/after your code change." | ||||
|     exit 1 | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_cpu_speed_torch_tensor "$@" | ||||
| fi | ||||
							
								
								
									
										44
									
								
								.ci/pytorch/perf_test/test_gpu_speed_cudnn_lstm.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								.ci/pytorch/perf_test/test_gpu_speed_cudnn_lstm.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,44 @@ | ||||
| #!/bin/bash | ||||
| set -e | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_cudnn_lstm () { | ||||
|   echo "Testing: CuDNN LSTM, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/benchmark.git | ||||
|  | ||||
|   cd benchmark/ | ||||
|  | ||||
|   git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0 | ||||
|  | ||||
|   cd scripts/ | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python cudnn_lstm.py --skip-cpu-governor-check) | ||||
|     echo "$runtime" | ||||
|     SAMPLE_ARRAY+=("${runtime}") | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}") | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo "$stats" | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_cudnn_lstm "$@" | ||||
| fi | ||||
							
								
								
									
										44
									
								
								.ci/pytorch/perf_test/test_gpu_speed_lstm.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								.ci/pytorch/perf_test/test_gpu_speed_lstm.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,44 @@ | ||||
| #!/bin/bash | ||||
| set -e | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_lstm () { | ||||
|   echo "Testing: LSTM, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/benchmark.git | ||||
|  | ||||
|   cd benchmark/ | ||||
|  | ||||
|   git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0 | ||||
|  | ||||
|   cd scripts/ | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python lstm.py --skip-cpu-governor-check) | ||||
|     echo "$runtime" | ||||
|     SAMPLE_ARRAY+=("${runtime}") | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}") | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo "$stats" | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_lstm "$@" | ||||
| fi | ||||
							
								
								
									
										44
									
								
								.ci/pytorch/perf_test/test_gpu_speed_mlstm.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								.ci/pytorch/perf_test/test_gpu_speed_mlstm.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,44 @@ | ||||
| #!/bin/bash | ||||
| set -e | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_mlstm () { | ||||
|   echo "Testing: MLSTM, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/benchmark.git | ||||
|  | ||||
|   cd benchmark/ | ||||
|  | ||||
|   git checkout 43dfb2c0370e70ef37f249dc09aff9f0ccd2ddb0 | ||||
|  | ||||
|   cd scripts/ | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python mlstm.py --skip-cpu-governor-check) | ||||
|     echo "$runtime" | ||||
|     SAMPLE_ARRAY+=("${runtime}") | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}") | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo "$stats" | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_mlstm "$@" | ||||
| fi | ||||
							
								
								
									
										48
									
								
								.ci/pytorch/perf_test/test_gpu_speed_mnist.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								.ci/pytorch/perf_test/test_gpu_speed_mnist.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,48 @@ | ||||
| #!/bin/bash | ||||
| set -e | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_mnist () { | ||||
|   echo "Testing: MNIST, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/examples.git -b perftests | ||||
|  | ||||
|   cd examples/mnist | ||||
|  | ||||
|   conda install -c pytorch torchvision | ||||
|  | ||||
|   # Download data | ||||
|   python main.py --epochs 0 | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   # Needs warm up to get accurate number | ||||
|   python main.py --epochs 1 --no-log | ||||
|  | ||||
|   for (( i=1; i<=NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python main.py --epochs 1 --no-log) | ||||
|     echo "$runtime" | ||||
|     SAMPLE_ARRAY+=("${runtime}") | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}") | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo "$stats" | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_mnist "$@" | ||||
| fi | ||||
							
								
								
									
										53
									
								
								.ci/pytorch/perf_test/test_gpu_speed_word_language_model.sh
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								.ci/pytorch/perf_test/test_gpu_speed_word_language_model.sh
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,53 @@ | ||||
| #!/bin/bash | ||||
| set -e | ||||
|  | ||||
| . ./common.sh | ||||
|  | ||||
| test_gpu_speed_word_language_model () { | ||||
|   echo "Testing: word language model on Wikitext-2, GPU" | ||||
|  | ||||
|   export OMP_NUM_THREADS=4 | ||||
|   export MKL_NUM_THREADS=4 | ||||
|  | ||||
|   git clone https://github.com/pytorch/examples.git -b perftests | ||||
|  | ||||
|   cd examples/word_language_model | ||||
|  | ||||
|   cd data/wikitext-2 | ||||
|  | ||||
|   # Reduce dataset size, so that we can have more runs per test | ||||
|   sed -n '1,200p' test.txt > test_tmp.txt | ||||
|   sed -n '1,1000p' train.txt > train_tmp.txt | ||||
|   sed -n '1,200p' valid.txt > valid_tmp.txt | ||||
|  | ||||
|   mv test_tmp.txt test.txt | ||||
|   mv train_tmp.txt train.txt | ||||
|   mv valid_tmp.txt valid.txt | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   SAMPLE_ARRAY=() | ||||
|   NUM_RUNS=$1 | ||||
|  | ||||
|   for (( i=1; i<=NUM_RUNS; i++ )) do | ||||
|     runtime=$(get_runtime_of_command python main.py --cuda --epochs 1) | ||||
|     echo "$runtime" | ||||
|     SAMPLE_ARRAY+=("${runtime}") | ||||
|   done | ||||
|  | ||||
|   cd ../.. | ||||
|  | ||||
|   stats=$(python ../get_stats.py "${SAMPLE_ARRAY[@]}") | ||||
|   echo "Runtime stats in seconds:" | ||||
|   echo "$stats" | ||||
|  | ||||
|   if [ "$2" == "compare_with_baseline" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" | ||||
|   elif [ "$2" == "compare_and_update" ]; then | ||||
|     python ../compare_with_baseline.py --test-name "${FUNCNAME[0]}" --sample-stats "${stats}" --update | ||||
|   fi | ||||
| } | ||||
|  | ||||
| if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then | ||||
|   run_test test_gpu_speed_word_language_model "$@" | ||||
| fi | ||||
							
								
								
									
										14
									
								
								.ci/pytorch/perf_test/update_commit_hash.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								.ci/pytorch/perf_test/update_commit_hash.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,14 @@ | ||||
| import json | ||||
| import sys | ||||
|  | ||||
|  | ||||
| data_file_path = sys.argv[1] | ||||
| commit_hash = sys.argv[2] | ||||
|  | ||||
| with open(data_file_path) as data_file: | ||||
|     data = json.load(data_file) | ||||
|  | ||||
| data["commit"] = commit_hash | ||||
|  | ||||
| with open(data_file_path, "w") as data_file: | ||||
|     json.dump(data, data_file) | ||||
| @ -119,6 +119,12 @@ popd | ||||
| git rm -rf "$install_path" || true | ||||
| mv "$pt_checkout/docs/build/html" "$install_path" | ||||
|  | ||||
| # Prevent Google from indexing $install_path/_modules. This folder contains | ||||
| # generated source files. | ||||
| # NB: the following only works on gnu sed. The sed shipped with mac os is different. | ||||
| # One can `brew install gnu-sed` on a mac and then use "gsed" instead of "sed". | ||||
| find "$install_path/_modules" -name "*.html" -print0 | xargs -0 sed -i '/<head>/a \ \ <meta name="robots" content="noindex">' | ||||
|  | ||||
| git add "$install_path" || true | ||||
| git status | ||||
| git config user.email "soumith+bot@pytorch.org" | ||||
|  | ||||
| @ -76,7 +76,7 @@ fi | ||||
| # Environment initialization | ||||
| if [[ "$(uname)" == Darwin ]]; then | ||||
|     # Install the testing dependencies | ||||
|     retry pip install -q future hypothesis ${NUMPY_PACKAGE} ${PROTOBUF_PACKAGE} pytest setuptools six typing_extensions pyyaml | ||||
|     retry conda install -yq future hypothesis ${NUMPY_PACKAGE} ${PROTOBUF_PACKAGE} pytest setuptools six typing_extensions pyyaml | ||||
| else | ||||
|     retry pip install -qr requirements.txt || true | ||||
|     retry pip install -q hypothesis protobuf pytest setuptools || true | ||||
| @ -91,6 +91,7 @@ fi | ||||
|  | ||||
| echo "Testing with:" | ||||
| pip freeze | ||||
| conda list || true | ||||
|  | ||||
| ############################################################################## | ||||
| # Smoke tests | ||||
|  | ||||
							
								
								
									
										71
									
								
								.ci/pytorch/short-perf-test-cpu.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										71
									
								
								.ci/pytorch/short-perf-test-cpu.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,71 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| SCRIPT_PARENT_DIR=$(dirname "${BASH_SOURCE[0]}") | ||||
|  | ||||
| # shellcheck source=.ci/pytorch/common.sh | ||||
| source "$SCRIPT_PARENT_DIR/common.sh" | ||||
|  | ||||
| cd .ci/pytorch/perf_test | ||||
|  | ||||
| echo "Running CPU perf test for PyTorch..." | ||||
|  | ||||
| pip install -q awscli | ||||
|  | ||||
| # Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read | ||||
| # More info at https://github.com/aws/aws-cli/issues/2321 | ||||
| aws configure set default.s3.multipart_threshold 5GB | ||||
| UPSTREAM_DEFAULT_BRANCH="$(git remote show https://github.com/pytorch/pytorch.git | awk '/HEAD branch/ {print $NF}')" | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then | ||||
|     # Get current default branch commit hash | ||||
|     DEFAULT_BRANCH_COMMIT_ID=$(git log --format="%H" -n 1) | ||||
|     export DEFAULT_BRANCH_COMMIT_ID | ||||
| fi | ||||
|  | ||||
| # Find the default branch commit to test against | ||||
| git remote add upstream https://github.com/pytorch/pytorch.git | ||||
| git fetch upstream | ||||
| IFS=$'\n' | ||||
| while IFS='' read -r commit_id; do | ||||
|     if aws s3 ls s3://ossci-perf-test/pytorch/cpu_runtime/"${commit_id}".json; then | ||||
|         LATEST_TESTED_COMMIT=${commit_id} | ||||
|         break | ||||
|     fi | ||||
| done < <(git rev-list upstream/"$UPSTREAM_DEFAULT_BRANCH") | ||||
| aws s3 cp s3://ossci-perf-test/pytorch/cpu_runtime/"${LATEST_TESTED_COMMIT}".json cpu_runtime.json | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then | ||||
|     # Prepare new baseline file | ||||
|     cp cpu_runtime.json new_cpu_runtime.json | ||||
|     python update_commit_hash.py new_cpu_runtime.json "${DEFAULT_BRANCH_COMMIT_ID}" | ||||
| fi | ||||
|  | ||||
| # Include tests | ||||
| # shellcheck source=./perf_test/test_cpu_speed_mini_sequence_labeler.sh | ||||
| . ./test_cpu_speed_mini_sequence_labeler.sh | ||||
| # shellcheck source=./perf_test/test_cpu_speed_mnist.sh | ||||
| . ./test_cpu_speed_mnist.sh | ||||
| # shellcheck source=./perf_test/test_cpu_speed_torch.sh | ||||
| . ./test_cpu_speed_torch.sh | ||||
| # shellcheck source=./perf_test/test_cpu_speed_torch_tensor.sh | ||||
| . ./test_cpu_speed_torch_tensor.sh | ||||
|  | ||||
| # Run tests | ||||
| export TEST_MODE="compare_with_baseline" | ||||
| if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then | ||||
|     export TEST_MODE="compare_and_update" | ||||
| fi | ||||
|  | ||||
| # Operator tests | ||||
| run_test test_cpu_speed_torch ${TEST_MODE} | ||||
| run_test test_cpu_speed_torch_tensor ${TEST_MODE} | ||||
|  | ||||
| # Sample model tests | ||||
| run_test test_cpu_speed_mini_sequence_labeler 20 ${TEST_MODE} | ||||
| run_test test_cpu_speed_mnist 20 ${TEST_MODE} | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then | ||||
|     # This could cause race condition if we are testing the same default branch commit twice, | ||||
|     # but the chance of them executing this line at the same time is low. | ||||
|     aws s3 cp new_cpu_runtime.json s3://ossci-perf-test/pytorch/cpu_runtime/"${DEFAULT_BRANCH_COMMIT_ID}".json --acl public-read | ||||
| fi | ||||
							
								
								
									
										76
									
								
								.ci/pytorch/short-perf-test-gpu.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										76
									
								
								.ci/pytorch/short-perf-test-gpu.sh
									
									
									
									
									
										Executable file
									
								
							| @ -0,0 +1,76 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # shellcheck source=./common.sh | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common.sh" | ||||
|  | ||||
| pushd .ci/pytorch/perf_test | ||||
|  | ||||
| echo "Running GPU perf test for PyTorch..." | ||||
|  | ||||
| # Trying to uninstall PyYAML can cause problem. Workaround according to: | ||||
| # https://github.com/pypa/pip/issues/5247#issuecomment-415571153 | ||||
| pip install -q awscli --ignore-installed PyYAML | ||||
|  | ||||
| # Set multipart_threshold to be sufficiently high, so that `aws s3 cp` is not a multipart read | ||||
| # More info at https://github.com/aws/aws-cli/issues/2321 | ||||
| aws configure set default.s3.multipart_threshold 5GB | ||||
| UPSTREAM_DEFAULT_BRANCH="$(git remote show https://github.com/pytorch/pytorch.git | awk '/HEAD branch/ {print $NF}')" | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then | ||||
|     # Get current default branch commit hash | ||||
|     DEFAULT_BRANCH_COMMIT_ID=$(git log --format="%H" -n 1) | ||||
|     export DEFAULT_BRANCH_COMMIT_ID | ||||
| fi | ||||
|  | ||||
| # Find the default branch commit to test against | ||||
| git remote add upstream https://github.com/pytorch/pytorch.git | ||||
| git fetch upstream | ||||
| IFS=$'\n' | ||||
| while IFS='' read -r commit_id; do | ||||
|     if aws s3 ls s3://ossci-perf-test/pytorch/gpu_runtime/"${commit_id}".json; then | ||||
|         LATEST_TESTED_COMMIT=${commit_id} | ||||
|         break | ||||
|     fi | ||||
| done < <(git rev-list upstream/"$UPSTREAM_DEFAULT_BRANCH") | ||||
| aws s3 cp s3://ossci-perf-test/pytorch/gpu_runtime/"${LATEST_TESTED_COMMIT}".json gpu_runtime.json | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then | ||||
|     # Prepare new baseline file | ||||
|     cp gpu_runtime.json new_gpu_runtime.json | ||||
|     python update_commit_hash.py new_gpu_runtime.json "${DEFAULT_BRANCH_COMMIT_ID}" | ||||
| fi | ||||
|  | ||||
| # Include tests | ||||
| # shellcheck source=./perf_test/test_gpu_speed_mnist.sh | ||||
| . ./test_gpu_speed_mnist.sh | ||||
| # shellcheck source=./perf_test/test_gpu_speed_word_language_model.sh | ||||
| . ./test_gpu_speed_word_language_model.sh | ||||
| # shellcheck source=./perf_test/test_gpu_speed_cudnn_lstm.sh | ||||
| . ./test_gpu_speed_cudnn_lstm.sh | ||||
| # shellcheck source=./perf_test/test_gpu_speed_lstm.sh | ||||
| . ./test_gpu_speed_lstm.sh | ||||
| # shellcheck source=./perf_test/test_gpu_speed_mlstm.sh | ||||
| . ./test_gpu_speed_mlstm.sh | ||||
|  | ||||
| # Run tests | ||||
| if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then | ||||
|     run_test test_gpu_speed_mnist 20 compare_and_update | ||||
|     run_test test_gpu_speed_word_language_model 20 compare_and_update | ||||
|     run_test test_gpu_speed_cudnn_lstm 20 compare_and_update | ||||
|     run_test test_gpu_speed_lstm 20 compare_and_update | ||||
|     run_test test_gpu_speed_mlstm 20 compare_and_update | ||||
| else | ||||
|     run_test test_gpu_speed_mnist 20 compare_with_baseline | ||||
|     run_test test_gpu_speed_word_language_model 20 compare_with_baseline | ||||
|     run_test test_gpu_speed_cudnn_lstm 20 compare_with_baseline | ||||
|     run_test test_gpu_speed_lstm 20 compare_with_baseline | ||||
|     run_test test_gpu_speed_mlstm 20 compare_with_baseline | ||||
| fi | ||||
|  | ||||
| if [[ "$COMMIT_SOURCE" == "$UPSTREAM_DEFAULT_BRANCH" ]]; then | ||||
|     # This could cause race condition if we are testing the same default branch commit twice, | ||||
|     # but the chance of them executing this line at the same time is low. | ||||
|     aws s3 cp new_gpu_runtime.json s3://ossci-perf-test/pytorch/gpu_runtime/"${DEFAULT_BRANCH_COMMIT_ID}".json --acl public-read | ||||
| fi | ||||
|  | ||||
| popd | ||||
| @ -80,7 +80,7 @@ def grep_symbols(lib: str, patterns: list[Any]) -> list[str]: | ||||
|         return functools.reduce(list.__add__, (x.result() for x in tasks), []) | ||||
|  | ||||
|  | ||||
| def check_lib_symbols_for_abi_correctness(lib: str) -> None: | ||||
| def check_lib_symbols_for_abi_correctness(lib: str, pre_cxx11_abi: bool = True) -> None: | ||||
|     print(f"lib: {lib}") | ||||
|     cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS) | ||||
|     pre_cxx11_symbols = grep_symbols(lib, LIBTORCH_PRE_CXX11_PATTERNS) | ||||
| @ -88,12 +88,28 @@ def check_lib_symbols_for_abi_correctness(lib: str) -> None: | ||||
|     num_pre_cxx11_symbols = len(pre_cxx11_symbols) | ||||
|     print(f"num_cxx11_symbols: {num_cxx11_symbols}") | ||||
|     print(f"num_pre_cxx11_symbols: {num_pre_cxx11_symbols}") | ||||
|     if num_pre_cxx11_symbols > 0: | ||||
|         raise RuntimeError( | ||||
|             f"Found pre-cxx11 symbols, but there shouldn't be any, see: {pre_cxx11_symbols[:100]}" | ||||
|     if pre_cxx11_abi: | ||||
|         if num_cxx11_symbols > 0: | ||||
|             raise RuntimeError( | ||||
|                 f"Found cxx11 symbols, but there shouldn't be any, see: {cxx11_symbols[:100]}" | ||||
|             ) | ||||
|         if num_pre_cxx11_symbols < 1000: | ||||
|             raise RuntimeError("Didn't find enough pre-cxx11 symbols.") | ||||
|         # Check for no recursive iterators, regression test for https://github.com/pytorch/pytorch/issues/133437 | ||||
|         rec_iter_symbols = grep_symbols( | ||||
|             lib, [re.compile("std::filesystem::recursive_directory_iterator.*")] | ||||
|         ) | ||||
|     if num_cxx11_symbols < 100: | ||||
|         raise RuntimeError("Didn't find enought cxx11 symbols") | ||||
|         if len(rec_iter_symbols) > 0: | ||||
|             raise RuntimeError( | ||||
|                 f"recursive_directory_iterator in used pre-CXX11 binaries, see; {rec_iter_symbols}" | ||||
|             ) | ||||
|     else: | ||||
|         if num_pre_cxx11_symbols > 0: | ||||
|             raise RuntimeError( | ||||
|                 f"Found pre-cxx11 symbols, but there shouldn't be any, see: {pre_cxx11_symbols[:100]}" | ||||
|             ) | ||||
|         if num_cxx11_symbols < 100: | ||||
|             raise RuntimeError("Didn't find enought cxx11 symbols") | ||||
|  | ||||
|  | ||||
| def main() -> None: | ||||
| @ -105,8 +121,9 @@ def main() -> None: | ||||
|         else: | ||||
|             install_root = Path(distutils.sysconfig.get_python_lib()) / "torch" | ||||
|  | ||||
|     libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so") | ||||
|     check_lib_symbols_for_abi_correctness(libtorch_cpu_path) | ||||
|     libtorch_cpu_path = install_root / "lib" / "libtorch_cpu.so" | ||||
|     pre_cxx11_abi = "cxx11-abi" not in os.getenv("DESIRED_DEVTOOLSET", "") | ||||
|     check_lib_symbols_for_abi_correctness(libtorch_cpu_path, pre_cxx11_abi) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|  | ||||
| @ -1,74 +0,0 @@ | ||||
| import ctypes | ||||
| import os | ||||
| import sys | ||||
| from pathlib import Path | ||||
|  | ||||
|  | ||||
| def get_gomp_thread(): | ||||
|     """ | ||||
|     Retrieves the maximum number of OpenMP threads after loading the `libgomp.so.1` library | ||||
|     and the `libtorch_cpu.so` library. It then queries the | ||||
|     maximum number of threads available for OpenMP parallel regions using the | ||||
|     `omp_get_max_threads` function. | ||||
|  | ||||
|     Returns: | ||||
|         int: The maximum number of OpenMP threads available. | ||||
|  | ||||
|     Notes: | ||||
|         - The function assumes the default path for `libgomp.so.1` on AlmaLinux OS. | ||||
|         - The path to `libtorch_cpu.so` is constructed based on the Python executable's | ||||
|           installation directory. | ||||
|         - This function is specific to environments where PyTorch and OpenMP are used | ||||
|           together and may require adjustments for other setups. | ||||
|     """ | ||||
|     python_path = Path(sys.executable).resolve() | ||||
|     python_prefix = ( | ||||
|         python_path.parent.parent | ||||
|     )  # Typically goes to the Python installation root | ||||
|  | ||||
|     # Get the additional ABI flags (if any); it may be an empty string. | ||||
|     abiflags = getattr(sys, "abiflags", "") | ||||
|  | ||||
|     # Construct the Python directory name correctly (e.g., "python3.13t"). | ||||
|     python_version = ( | ||||
|         f"python{sys.version_info.major}.{sys.version_info.minor}{abiflags}" | ||||
|     ) | ||||
|  | ||||
|     libtorch_cpu_path = ( | ||||
|         python_prefix | ||||
|         / "lib" | ||||
|         / python_version | ||||
|         / "site-packages" | ||||
|         / "torch" | ||||
|         / "lib" | ||||
|         / "libtorch_cpu.so" | ||||
|     ) | ||||
|  | ||||
|     # use the default gomp path of AlmaLinux OS | ||||
|     libgomp_path = "/usr/lib64/libgomp.so.1" | ||||
|  | ||||
|     os.environ["GOMP_CPU_AFFINITY"] = "0-3" | ||||
|  | ||||
|     libgomp = ctypes.CDLL(libgomp_path) | ||||
|     libgomp = ctypes.CDLL(libtorch_cpu_path) | ||||
|  | ||||
|     libgomp.omp_get_max_threads.restype = ctypes.c_int | ||||
|     libgomp.omp_get_max_threads.argtypes = [] | ||||
|  | ||||
|     omp_max_threads = libgomp.omp_get_max_threads() | ||||
|     return omp_max_threads | ||||
|  | ||||
|  | ||||
| def main(): | ||||
|     omp_max_threads = get_gomp_thread() | ||||
|     print( | ||||
|         f"omp_max_threads after loading libgomp.so and libtorch_cpu.so: {omp_max_threads}" | ||||
|     ) | ||||
|     if omp_max_threads == 1: | ||||
|         raise RuntimeError( | ||||
|             "omp_max_threads is 1. Check whether libgomp.so is loaded twice." | ||||
|         ) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
| @ -7,7 +7,6 @@ import subprocess | ||||
| import sys | ||||
| from pathlib import Path | ||||
| from tempfile import NamedTemporaryFile | ||||
| from typing import Optional | ||||
|  | ||||
| import torch | ||||
| import torch._dynamo | ||||
| @ -77,13 +76,10 @@ def read_release_matrix(): | ||||
|  | ||||
|  | ||||
| def test_numpy(): | ||||
|     try: | ||||
|         import numpy as np | ||||
|     import numpy as np | ||||
|  | ||||
|         x = np.arange(5) | ||||
|         torch.tensor(x) | ||||
|     except ImportError: | ||||
|         print("Numpy check skipped. Numpy is not installed.") | ||||
|     x = np.arange(5) | ||||
|     torch.tensor(x) | ||||
|  | ||||
|  | ||||
| def check_version(package: str) -> None: | ||||
| @ -196,41 +192,8 @@ def test_cuda_gds_errors_captured() -> None: | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def find_pypi_package_version(package: str) -> Optional[str]: | ||||
|     from importlib import metadata | ||||
|  | ||||
|     dists = metadata.distributions() | ||||
|     for dist in dists: | ||||
|         if dist.metadata["Name"].startswith(package): | ||||
|             return dist.version | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def cudnn_to_version_str(cudnn_version: int) -> str: | ||||
|     patch = int(cudnn_version % 10) | ||||
|     minor = int((cudnn_version / 100) % 100) | ||||
|     major = int((cudnn_version / 10000) % 10000) | ||||
|     return f"{major}.{minor}.{patch}" | ||||
|  | ||||
|  | ||||
| def compare_pypi_to_torch_versions( | ||||
|     package: str, pypi_version: str, torch_version: str | ||||
| ) -> None: | ||||
|     if pypi_version is None: | ||||
|         raise RuntimeError(f"Can't find {package} in PyPI for Torch: {torch_version}") | ||||
|     if pypi_version.startswith(torch_version): | ||||
|         print(f"Found matching {package}. Torch: {torch_version} PyPI {pypi_version}") | ||||
|     else: | ||||
|         raise RuntimeError( | ||||
|             f"Wrong {package} version. Torch: {torch_version} PyPI: {pypi_version}" | ||||
|         ) | ||||
|  | ||||
|  | ||||
| def smoke_test_cuda( | ||||
|     package: str, | ||||
|     runtime_error_check: str, | ||||
|     torch_compile_check: str, | ||||
|     pypi_pkg_check: str, | ||||
|     package: str, runtime_error_check: str, torch_compile_check: str | ||||
| ) -> None: | ||||
|     if not torch.cuda.is_available() and is_cuda_system: | ||||
|         raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.") | ||||
| @ -260,30 +223,20 @@ def smoke_test_cuda( | ||||
|             raise RuntimeError( | ||||
|                 f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}" | ||||
|             ) | ||||
|  | ||||
|         print(f"torch cuda: {torch.version.cuda}") | ||||
|         # todo add cudnn version validation | ||||
|         print(f"torch cudnn: {torch.backends.cudnn.version()}") | ||||
|         print(f"cuDNN enabled? {torch.backends.cudnn.enabled}") | ||||
|  | ||||
|         torch.cuda.init() | ||||
|         print("CUDA initialized successfully") | ||||
|         print(f"Number of CUDA devices: {torch.cuda.device_count()}") | ||||
|         for i in range(torch.cuda.device_count()): | ||||
|             print(f"Device {i}: {torch.cuda.get_device_name(i)}") | ||||
|  | ||||
|         print(f"cuDNN enabled? {torch.backends.cudnn.enabled}") | ||||
|         torch_cudnn_version = cudnn_to_version_str(torch.backends.cudnn.version()) | ||||
|         print(f"Torch cuDNN version: {torch_cudnn_version}") | ||||
|  | ||||
|         # nccl is availbale only on Linux | ||||
|         if sys.platform in ["linux", "linux2"]: | ||||
|             torch_nccl_version = ".".join(str(v) for v in torch.cuda.nccl.version()) | ||||
|             print(f"Torch nccl; version: {torch_nccl_version}") | ||||
|  | ||||
|         # Pypi dependencies are installed on linux ony and nccl is availbale only on Linux. | ||||
|         if pypi_pkg_check == "enabled" and sys.platform in ["linux", "linux2"]: | ||||
|             compare_pypi_to_torch_versions( | ||||
|                 "cudnn", find_pypi_package_version("nvidia-cudnn"), torch_cudnn_version | ||||
|             ) | ||||
|             compare_pypi_to_torch_versions( | ||||
|                 "nccl", find_pypi_package_version("nvidia-nccl"), torch_nccl_version | ||||
|             ) | ||||
|             print(f"torch nccl version: {torch.cuda.nccl.version()}") | ||||
|  | ||||
|         if runtime_error_check == "enabled": | ||||
|             test_cuda_runtime_errors_captured() | ||||
| @ -442,13 +395,6 @@ def parse_args(): | ||||
|         choices=["enabled", "disabled"], | ||||
|         default="enabled", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--pypi-pkg-check", | ||||
|         help="Check pypi package versions cudnn and nccl", | ||||
|         type=str, | ||||
|         choices=["enabled", "disabled"], | ||||
|         default="enabled", | ||||
|     ) | ||||
|     return parser.parse_args() | ||||
|  | ||||
|  | ||||
| @ -464,7 +410,6 @@ def main() -> None: | ||||
|     smoke_test_conv2d() | ||||
|     test_linalg() | ||||
|     test_numpy() | ||||
|  | ||||
|     if is_cuda_system: | ||||
|         test_linalg("cuda") | ||||
|         test_cuda_gds_errors_captured() | ||||
| @ -473,10 +418,7 @@ def main() -> None: | ||||
|         smoke_test_modules() | ||||
|  | ||||
|     smoke_test_cuda( | ||||
|         options.package, | ||||
|         options.runtime_error_check, | ||||
|         options.torch_compile_check, | ||||
|         options.pypi_pkg_check, | ||||
|         options.package, options.runtime_error_check, options.torch_compile_check | ||||
|     ) | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -191,10 +191,6 @@ if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|     # shellcheck disable=SC1091 | ||||
|     source /opt/intel/oneapi/umf/latest/env/vars.sh | ||||
|   fi | ||||
|   # shellcheck disable=SC1091 | ||||
|   source /opt/intel/oneapi/ccl/latest/env/vars.sh | ||||
|   # shellcheck disable=SC1091 | ||||
|   source /opt/intel/oneapi/mpi/latest/env/vars.sh | ||||
|   # Check XPU status before testing | ||||
|   xpu-smi discovery | ||||
| fi | ||||
| @ -318,12 +314,6 @@ test_python() { | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_python_smoke() { | ||||
|   # Smoke tests for H100 | ||||
|   time python test/run_test.py --include test_matmul_cuda inductor/test_fp8 inductor/test_max_autotune $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running | ||||
|   assert_git_not_dirty | ||||
| } | ||||
|  | ||||
| test_lazy_tensor_meta_reference_disabled() { | ||||
|   export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1 | ||||
|   echo "Testing lazy tensor operations without meta reference" | ||||
| @ -408,15 +398,8 @@ test_inductor_aoti() { | ||||
|     # We need to hipify before building again | ||||
|     python3 tools/amd_build/build_amd.py | ||||
|   fi | ||||
|   if [[ "$BUILD_ENVIRONMENT" == *sm86* ]]; then | ||||
|     BUILD_AOT_INDUCTOR_TEST=1 TORCH_CUDA_ARCH_LIST=8.6 USE_FLASH_ATTENTION=OFF python setup.py develop | ||||
|     # TODO: Replace me completely, as one should not use conda libstdc++, nor need special path to TORCH_LIB | ||||
|     LD_LIBRARY_PATH=/opt/conda/envs/py_3.10/lib/:${TORCH_LIB_DIR}:$LD_LIBRARY_PATH | ||||
|     CPP_TESTS_DIR="${BUILD_BIN_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference -dist=loadfile | ||||
|   else | ||||
|     BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop | ||||
|     CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference -dist=loadfile | ||||
|   fi | ||||
|   BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop | ||||
|   CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference | ||||
| } | ||||
|  | ||||
| test_inductor_cpp_wrapper_shard() { | ||||
| @ -431,11 +414,10 @@ test_inductor_cpp_wrapper_shard() { | ||||
|  | ||||
|   if [[ "$1" -eq "2" ]]; then | ||||
|     # For now, manually put the opinfo tests in shard 2, and all other tests in | ||||
|     # shard 1.  Run all CPU tests, as well as specific GPU tests triggering past | ||||
|     # bugs, for now. | ||||
|     # shard 1.  Test specific things triggering past bugs, for now. | ||||
|     python test/run_test.py \ | ||||
|       --include inductor/test_torchinductor_opinfo \ | ||||
|       -k 'linalg or to_sparse or TestInductorOpInfoCPU' \ | ||||
|       -k 'linalg or to_sparse' \ | ||||
|       --verbose | ||||
|     exit | ||||
|   fi | ||||
| @ -820,7 +802,16 @@ test_inductor_torchbench_smoketest_perf() { | ||||
|   done | ||||
| } | ||||
|  | ||||
| test_inductor_get_core_number() { | ||||
|   if [[ "${TEST_CONFIG}" == *aarch64* ]]; then | ||||
|     echo "$(($(lscpu | grep 'Cluster(s):' | awk '{print $2}') * $(lscpu | grep 'Core(s) per cluster:' | awk '{print $4}')))" | ||||
|   else | ||||
|     echo "$(($(lscpu | grep 'Socket(s):' | awk '{print $2}') * $(lscpu | grep 'Core(s) per socket:' | awk '{print $4}')))" | ||||
|   fi | ||||
| } | ||||
|  | ||||
| test_inductor_set_cpu_affinity(){ | ||||
|   #set jemalloc | ||||
|   JEMALLOC_LIB="$(find /usr/lib -name libjemalloc.so.2)" | ||||
|   export LD_PRELOAD="$JEMALLOC_LIB":"$LD_PRELOAD" | ||||
|   export MALLOC_CONF="oversize_threshold:1,background_thread:true,metadata_thp:auto,dirty_decay_ms:-1,muzzy_decay_ms:-1" | ||||
| @ -832,23 +823,14 @@ test_inductor_set_cpu_affinity(){ | ||||
|     export KMP_AFFINITY=granularity=fine,compact,1,0 | ||||
|     export KMP_BLOCKTIME=1 | ||||
|   fi | ||||
|  | ||||
|   # Use nproc here instead of lscpu because it takes into account cgroups slice | ||||
|   cpus=$(nproc) | ||||
|   thread_per_core=$(lscpu | grep 'Thread(s) per core:' | awk '{print $4}') | ||||
|   cores=$((cpus / thread_per_core)) | ||||
|  | ||||
|   # Set number of cores to 16 on aarch64 for performance runs | ||||
|   cores=$(test_inductor_get_core_number) | ||||
|   # Set number of cores to 16 on Aarch64 for performance runs. | ||||
|   if [[ "${TEST_CONFIG}" == *aarch64* && $cores -gt 16 ]]; then | ||||
|     cores=16 | ||||
|   fi | ||||
|   export OMP_NUM_THREADS=$cores | ||||
|  | ||||
|   # Handle cgroups slice start and end CPU | ||||
|   start_cpu=$(python -c 'import os; print(min(os.sched_getaffinity(0)))') | ||||
|   # Leaving one physical CPU for other tasks | ||||
|   end_cpu=$(($(python -c 'import os; print(max(os.sched_getaffinity(0)))') - thread_per_core)) | ||||
|   export TASKSET="taskset -c $start_cpu-$end_cpu" | ||||
|   end_core=$((cores-1)) | ||||
|   export TASKSET="taskset -c 0-$end_core" | ||||
| } | ||||
|  | ||||
| test_inductor_torchbench_cpu_smoketest_perf(){ | ||||
| @ -1191,7 +1173,7 @@ build_xla() { | ||||
|   apply_patches | ||||
|   SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')" | ||||
|   # These functions are defined in .circleci/common.sh in pytorch/xla repo | ||||
|   retry install_pre_deps_pytorch_xla $XLA_DIR $USE_CACHE | ||||
|   retry install_deps_pytorch_xla $XLA_DIR $USE_CACHE | ||||
|   CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR | ||||
|   assert_git_not_dirty | ||||
| } | ||||
| @ -1492,11 +1474,14 @@ test_executorch() { | ||||
|   pushd /executorch | ||||
|  | ||||
|   export PYTHON_EXECUTABLE=python | ||||
|   export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" | ||||
|   export EXECUTORCH_BUILD_PYBIND=ON | ||||
|   export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON" | ||||
|  | ||||
|   # For llama3 | ||||
|   bash examples/models/llama3_2_vision/install_requirements.sh | ||||
|   # NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch | ||||
|   # from the PR | ||||
|   bash .ci/scripts/setup-linux.sh --build-tool cmake | ||||
|   bash .ci/scripts/setup-linux.sh cmake | ||||
|  | ||||
|   echo "Run ExecuTorch unit tests" | ||||
|   pytest -v -n auto | ||||
| @ -1536,33 +1521,12 @@ test_linux_aarch64() { | ||||
|        inductor/test_inplacing_pass inductor/test_kernel_benchmark inductor/test_layout_optim \ | ||||
|        inductor/test_max_autotune inductor/test_memory_planning inductor/test_metrics inductor/test_multi_kernel inductor/test_pad_mm \ | ||||
|        inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \ | ||||
|        inductor/test_split_cat_fx_passes inductor/test_compile inductor/test_torchinductor \ | ||||
|        inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \ | ||||
|        inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes inductor/test_memory \ | ||||
|        inductor/test_triton_cpu_backend inductor/test_triton_extension_backend inductor/test_mkldnn_pattern_matcher inductor/test_cpu_cpp_wrapper \ | ||||
|        --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose | ||||
| } | ||||
|  | ||||
| test_operator_benchmark() { | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|   TEST_DIR=$(pwd) | ||||
|  | ||||
|   test_inductor_set_cpu_affinity | ||||
|  | ||||
|   cd benchmarks/operator_benchmark/pt_extension | ||||
|   python setup.py install | ||||
|  | ||||
|   cd "${TEST_DIR}"/benchmarks/operator_benchmark | ||||
|   $TASKSET python -m benchmark_all_test --device "$1" --tag-filter "$2" \ | ||||
|       --output-dir "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" | ||||
|  | ||||
|   pip_install pandas | ||||
|   python check_perf_csv.py \ | ||||
|       --actual "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" \ | ||||
|       --expected "expected_ci_operator_benchmark_eager_float32_cpu.csv" | ||||
| } | ||||
|  | ||||
|  | ||||
| if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then | ||||
|   (cd test && python -c "import torch; print(torch.__config__.show())") | ||||
|   (cd test && python -c "import torch; print(torch.__config__.parallel_info())") | ||||
| @ -1593,19 +1557,6 @@ elif [[ "$TEST_CONFIG" == distributed ]]; then | ||||
|   if [[ "${SHARD_NUMBER}" == 1 ]]; then | ||||
|     test_rpc | ||||
|   fi | ||||
| elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then | ||||
|   TEST_MODE="short" | ||||
|  | ||||
|   if [[ "${TEST_CONFIG}" == *cpu* ]]; then | ||||
|     if [[ "${TEST_CONFIG}" == *long* ]]; then | ||||
|       TEST_MODE="long" | ||||
|     elif [[ "${TEST_CONFIG}" == *all* ]]; then | ||||
|       TEST_MODE="all" | ||||
|     fi | ||||
|  | ||||
|     test_operator_benchmark cpu ${TEST_MODE} | ||||
|  | ||||
|   fi | ||||
| elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then | ||||
|   test_inductor_distributed | ||||
| elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then | ||||
| @ -1668,7 +1619,6 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then | ||||
|   install_torchvision | ||||
|   checkout_install_torchbench hf_T5 llama moco | ||||
|   PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER" | ||||
|   test_inductor_aoti | ||||
| elif [[ "${TEST_CONFIG}" == *inductor* ]]; then | ||||
|   install_torchvision | ||||
|   test_inductor_shard "${SHARD_NUMBER}" | ||||
| @ -1722,8 +1672,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then | ||||
|   test_python | ||||
|   test_aten | ||||
|   test_xpu_bin | ||||
| elif [[ "${TEST_CONFIG}" == smoke ]]; then | ||||
|   test_python_smoke | ||||
| else | ||||
|   install_torchvision | ||||
|   install_monkeytype | ||||
|  | ||||
| @ -37,11 +37,6 @@ call %INSTALLER_DIR%\activate_miniconda3.bat | ||||
| if errorlevel 1 goto fail | ||||
| if not errorlevel 0 goto fail | ||||
|  | ||||
| :: Update CMake | ||||
| call choco upgrade -y cmake --no-progress --installargs 'ADD_CMAKE_TO_PATH=System' --apply-install-arguments-to-dependencies --version=3.27.9 | ||||
| if errorlevel 1 goto fail | ||||
| if not errorlevel 0 goto fail | ||||
|  | ||||
| call pip install mkl-include==2021.4.0 mkl-devel==2021.4.0 | ||||
| if errorlevel 1 goto fail | ||||
| if not errorlevel 0 goto fail | ||||
| @ -93,7 +88,7 @@ set PATH=%CUDA_PATH%\bin;%CUDA_PATH%\libnvvp;%PATH% | ||||
| :cuda_build_end | ||||
|  | ||||
| set DISTUTILS_USE_SDK=1 | ||||
| set PATH=%TMP_DIR_WIN%\bin;C:\Program Files\CMake\bin;%PATH% | ||||
| set PATH=%TMP_DIR_WIN%\bin;%PATH% | ||||
|  | ||||
| :: The latest Windows CUDA test is running on AWS G5 runner with A10G GPU | ||||
| if "%TORCH_CUDA_ARCH_LIST%" == "" set TORCH_CUDA_ARCH_LIST=8.6 | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	