Compare commits

..

1 Commits

Author SHA1 Message Date
fce58a0466 [Dynamo] initial foreach_op impl 2024-12-02 17:18:58 -08:00
4148 changed files with 79492 additions and 181963 deletions

View File

@ -3,15 +3,22 @@ set -eux -o pipefail
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
export TORCH_CUDA_ARCH_LIST="9.0"
elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
export TORCH_CUDA_ARCH_LIST="9.0;10.0;12.0"
fi
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
source $SCRIPTPATH/aarch64_ci_setup.sh
tagged_version() {
GIT_DESCRIBE="git --git-dir /pytorch/.git describe --tags --match v[0-9]*.[0-9]*.[0-9]*"
if ${GIT_DESCRIBE} --exact >/dev/null; then
${GIT_DESCRIBE}
else
return 1
fi
}
if tagged_version >/dev/null; then
export OVERRIDE_PACKAGE_VERSION="$(tagged_version | sed -e 's/^v//' -e 's/-.*$//')"
fi
###############################################################################
# Run aarch64 builder python
###############################################################################

View File

@ -5,14 +5,16 @@ set -eux -o pipefail
# By creating symlinks from desired /opt/python to /usr/local/bin/
NUMPY_VERSION=2.0.2
if [[ "$DESIRED_PYTHON" == "3.13" || "$DESIRED_PYTHON" == "3.13t" ]]; then
PYGIT2_VERSION=1.15.1
if [[ "$DESIRED_PYTHON" == "3.13" ]]; then
NUMPY_VERSION=2.1.2
PYGIT2_VERSION=1.16.0
fi
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
source $SCRIPTPATH/../manywheel/set_desired_python.sh
pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2
pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2 pygit2==${PYGIT2_VERSION}
for tool in python python3 pip pip3 ninja scons patchelf; do
ln -sf ${DESIRED_PYTHON_BIN_DIR}/${tool} /usr/local/bin;

View File

@ -4,9 +4,12 @@
import os
import shutil
from subprocess import check_call, check_output
from typing import List
from pygit2 import Repository
def list_dir(path: str) -> list[str]:
def list_dir(path: str) -> List[str]:
"""'
Helper for getting paths for Python
"""
@ -55,7 +58,7 @@ def build_ArmComputeLibrary() -> None:
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")
def update_wheel(wheel_path, desired_cuda) -> None:
def update_wheel(wheel_path) -> None:
"""
Update the cuda wheel libraries
"""
@ -77,6 +80,7 @@ def update_wheel(wheel_path, desired_cuda) -> None:
"/usr/local/cuda/lib64/libnvToolsExt.so.1",
"/usr/local/cuda/lib64/libnvJitLink.so.12",
"/usr/local/cuda/lib64/libnvrtc.so.12",
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4",
"/usr/local/cuda/lib64/libcudnn_adv.so.9",
"/usr/local/cuda/lib64/libcudnn_cnn.so.9",
"/usr/local/cuda/lib64/libcudnn_graph.so.9",
@ -96,14 +100,6 @@ def update_wheel(wheel_path, desired_cuda) -> None:
"/usr/local/lib/libnvpl_lapack_core.so.0",
"/usr/local/lib/libnvpl_blas_core.so.0",
]
if "126" in desired_cuda:
libs_to_copy += [
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6",
]
elif "128" in desired_cuda:
libs_to_copy += [
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8",
]
else:
libs_to_copy += [
"/opt/OpenBLAS/lib/libopenblas.so.0",
@ -175,22 +171,22 @@ if __name__ == "__main__":
args = parse_arguments()
enable_mkldnn = args.enable_mkldnn
enable_cuda = args.enable_cuda
branch = check_output(
["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd="/pytorch"
).decode()
repo = Repository("/pytorch")
branch = repo.head.name
if branch == "HEAD":
branch = "master"
print("Building PyTorch wheel")
build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
os.system("cd /pytorch; python setup.py clean")
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
desired_cuda = os.getenv("DESIRED_CUDA")
if override_package_version is not None:
version = override_package_version
build_vars += (
f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
)
elif branch in ["nightly", "main"]:
elif branch in ["nightly", "master"]:
build_date = (
check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch")
.decode()
@ -200,6 +196,7 @@ if __name__ == "__main__":
check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2]
)
if enable_cuda:
desired_cuda = os.getenv("DESIRED_CUDA")
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date}+{desired_cuda} PYTORCH_BUILD_NUMBER=1 "
else:
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
@ -228,6 +225,6 @@ if __name__ == "__main__":
print("Updating Cuda Dependency")
filename = os.listdir("/pytorch/dist/")
wheel_path = f"/pytorch/dist/{filename[0]}"
update_wheel(wheel_path, desired_cuda)
update_wheel(wheel_path)
pytorch_wheel_name = complete_wheel("/pytorch/")
print(f"Build Complete. Created {pytorch_wheel_name}..")

View File

@ -12,7 +12,7 @@ import os
import subprocess
import sys
import time
from typing import Optional, Union
from typing import Dict, List, Optional, Tuple, Union
import boto3
@ -24,12 +24,10 @@ os_amis = {
"ubuntu22_04": "ami-0c6c29c5125214c77", # login_name: ubuntu
"redhat8": "ami-0698b90665a2ddcf1", # login_name: ec2-user
}
ubuntu18_04_ami = os_amis["ubuntu18_04"]
ubuntu20_04_ami = os_amis["ubuntu20_04"]
def compute_keyfile_path(key_name: Optional[str] = None) -> tuple[str, str]:
def compute_keyfile_path(key_name: Optional[str] = None) -> Tuple[str, str]:
if key_name is None:
key_name = os.getenv("AWS_KEY_NAME")
if key_name is None:
@ -59,7 +57,7 @@ def ec2_instances_by_id(instance_id):
def start_instance(
key_name, ami=ubuntu20_04_ami, instance_type="t4g.2xlarge", ebs_size: int = 50
key_name, ami=ubuntu18_04_ami, instance_type="t4g.2xlarge", ebs_size: int = 50
):
inst = ec2.create_instances(
ImageId=ami,
@ -98,7 +96,7 @@ class RemoteHost:
self.keyfile_path = keyfile_path
self.login_name = login_name
def _gen_ssh_prefix(self) -> list[str]:
def _gen_ssh_prefix(self) -> List[str]:
return [
"ssh",
"-o",
@ -110,13 +108,13 @@ class RemoteHost:
]
@staticmethod
def _split_cmd(args: Union[str, list[str]]) -> list[str]:
def _split_cmd(args: Union[str, List[str]]) -> List[str]:
return args.split() if isinstance(args, str) else args
def run_ssh_cmd(self, args: Union[str, list[str]]) -> None:
def run_ssh_cmd(self, args: Union[str, List[str]]) -> None:
subprocess.check_call(self._gen_ssh_prefix() + self._split_cmd(args))
def check_ssh_output(self, args: Union[str, list[str]]) -> str:
def check_ssh_output(self, args: Union[str, List[str]]) -> str:
return subprocess.check_output(
self._gen_ssh_prefix() + self._split_cmd(args)
).decode("utf-8")
@ -159,7 +157,7 @@ class RemoteHost:
def using_docker(self) -> bool:
return self.container_id is not None
def run_cmd(self, args: Union[str, list[str]]) -> None:
def run_cmd(self, args: Union[str, List[str]]) -> None:
if not self.using_docker():
return self.run_ssh_cmd(args)
assert self.container_id is not None
@ -180,7 +178,7 @@ class RemoteHost:
if rc != 0:
raise subprocess.CalledProcessError(rc, docker_cmd)
def check_output(self, args: Union[str, list[str]]) -> str:
def check_output(self, args: Union[str, List[str]]) -> str:
if not self.using_docker():
return self.check_ssh_output(args)
assert self.container_id is not None
@ -232,7 +230,7 @@ class RemoteHost:
)
self.download_file(remote_file, local_file)
def list_dir(self, path: str) -> list[str]:
def list_dir(self, path: str) -> List[str]:
return self.check_output(["ls", "-1", path]).split("\n")
@ -360,7 +358,7 @@ def checkout_repo(
branch: str = "main",
url: str,
git_clone_flags: str,
mapping: dict[str, tuple[str, str]],
mapping: Dict[str, Tuple[str, str]],
) -> Optional[str]:
for prefix in mapping:
if not branch.startswith(prefix):
@ -621,11 +619,9 @@ def build_torchaudio(
if host.using_docker():
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
host.run_cmd(
f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \
host.run_cmd(f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \
&& ./packaging/ffmpeg/build.sh \
&& {build_vars} python3 setup.py bdist_wheel"
)
&& {build_vars} python3 setup.py bdist_wheel")
wheel_name = host.list_dir("audio/dist")[0]
embed_libgomp(host, use_conda, os.path.join("audio", "dist", wheel_name))
@ -683,7 +679,7 @@ def build_domains(
branch: str = "main",
use_conda: bool = True,
git_clone_flags: str = "",
) -> tuple[str, str, str, str]:
) -> Tuple[str, str, str, str]:
vision_wheel_name = build_torchvision(
host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags
)
@ -710,7 +706,7 @@ def start_build(
pytorch_build_number: Optional[str] = None,
shallow_clone: bool = True,
enable_mkldnn: bool = False,
) -> tuple[str, str, str, str, str]:
) -> Tuple[str, str, str, str, str]:
git_clone_flags = " --depth 1 --shallow-submodules" if shallow_clone else ""
if host.using_docker() and not use_conda:
print("Auto-selecting conda option for docker images")
@ -934,9 +930,9 @@ def parse_arguments():
parser.add_argument("--debug", action="store_true")
parser.add_argument("--build-only", action="store_true")
parser.add_argument("--test-only", type=str)
group = parser.add_mutually_exclusive_group()
group.add_argument("--os", type=str, choices=list(os_amis.keys()))
group.add_argument("--ami", type=str)
parser.add_argument(
"--os", type=str, choices=list(os_amis.keys()), default="ubuntu20_04"
)
parser.add_argument(
"--python-version",
type=str,
@ -966,13 +962,7 @@ def parse_arguments():
if __name__ == "__main__":
args = parse_arguments()
ami = (
args.ami
if args.ami is not None
else os_amis[args.os]
if args.os is not None
else ubuntu20_04_ami
)
ami = os_amis[args.os]
keyfile_path, key_name = compute_keyfile_path(args.key_name)
if args.list_instances:

View File

@ -0,0 +1,5 @@
0.7b
manylinux_2_17
rocm6.2
9be04068c3c0857a4cfd17d7e39e71d0423ebac2
3e9e1959d23b93d78a08fcc5f868125dc3854dece32fd9458be9ef4467982291

View File

@ -86,10 +86,6 @@ CMAKE_VERSION=3.18.5
_UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb
_UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
if [[ "$image" == *rocm* ]]; then
_UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
_UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
fi
# It's annoying to rename jobs every time you want to rewrite a
# configuration, so we hardcode everything here rather than do it
@ -109,6 +105,20 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.4.1
CUDNN_VERSION=9
@ -124,6 +134,36 @@ case "$image" in
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks)
CUDA_VERSION=12.4.1
CUDNN_VERSION=9
@ -139,10 +179,10 @@ case "$image" in
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3.13-gcc9-inductor-benchmarks)
CUDA_VERSION=12.4.1
pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9)
CUDA_VERSION=11.8.0
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.13
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
@ -152,10 +192,37 @@ case "$image" in
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9)
CUDA_VERSION=11.8.0
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
CUDA_VERSION=12.4.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
CUDA_VERSION=12.4.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
@ -210,7 +277,18 @@ case "$image" in
;;
pytorch-linux-focal-rocm-n-1-py3)
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=6.1
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-rocm-n-py3)
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
@ -218,25 +296,6 @@ case "$image" in
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-rocm-n-py3)
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
PROTOBUF=yes
DB=yes
VISION=yes
ROCM_VERSION=6.3
NINJA_VERSION=1.9.0
CONDA_CMAKE=yes
TRITON=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-jammy-xpu-2024.0-py3)
ANACONDA_PYTHON_VERSION=3.9
@ -451,7 +510,7 @@ docker build \
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
--build-arg "KATEX=${KATEX:-}" \
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a}" \
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \

View File

@ -113,6 +113,13 @@ COPY triton_version.txt triton_version.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
# Install AOTriton (Early fail)
COPY ./aotriton_version.txt aotriton_version.txt
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"]
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH

View File

@ -1 +1 @@
5e4d6b6380d575e48e37e9d987fded4ec588e7bc
6f638937d64e3396793956d75ee3e14802022745

View File

@ -1 +0,0 @@
v2.21.5-1

View File

@ -1 +0,0 @@
v2.25.1-1

View File

@ -1 +1 @@
5d535d7a2d4b435b1b5c1177fd8f04a12b942b9a
ac3470188b914c5d7a5058a7e28b9eb685a62427

View File

@ -1 +1 @@
4b3bb1f8da0ded6ccd572dd1358ef45af5a1befe
35c6c7c6284582b3f41c71c150e11b517acf074a

View File

@ -1,7 +1,7 @@
set -euo pipefail
readonly version=v24.04
readonly src_host=https://github.com/ARM-software
readonly src_host=https://review.mlplatform.org/ml
readonly src_repo=ComputeLibrary
# Clone ACL

View File

@ -0,0 +1,23 @@
#!/bin/bash
set -ex
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
TARBALL='aotriton.tar.gz'
# This read command alwasy returns with exit code 1
read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true
ARCH=$(uname -m)
AOTRITON_INSTALL_PREFIX="$1"
AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.gz"
cd "${AOTRITON_INSTALL_PREFIX}"
# Must use -L to follow redirects
curl -L --retry 3 -o "${TARBALL}" "${AOTRITON_URL}"
ACTUAL_SHA256=$(sha256sum "${TARBALL}" | cut -d " " -f 1)
if [ "${SHA256}" != "${ACTUAL_SHA256}" ]; then
echo -n "Error: The SHA256 of downloaded tarball is ${ACTUAL_SHA256},"
echo " which does not match the expected value ${SHA256}."
exit
fi
tar xf "${TARBALL}" && rm -rf "${TARBALL}"

View File

@ -32,12 +32,8 @@ install_ubuntu() {
# HACK: UCC testing relies on libnccl library from NVIDIA repo, and version 2.16 crashes
# See https://github.com/pytorch/pytorch/pull/105260#issuecomment-1673399729
# TODO: Eliminate this hack, we should not relay on apt-get installation
# See https://github.com/pytorch/pytorch/issues/144768
if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then
maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages"
elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then
maybe_libnccl_dev="libnccl2=2.25.1-1+cuda12.4 libnccl-dev=2.25.1-1+cuda12.4 --allow-downgrades --allow-change-held-packages"
else
maybe_libnccl_dev=""
fi
@ -80,8 +76,7 @@ install_ubuntu() {
vim \
unzip \
gpg-agent \
gdb \
bc
gdb
# Should resolve issues related to various apt package repository cert issues
# see: https://github.com/pytorch/pytorch/issues/65931

View File

@ -9,7 +9,12 @@ install_ubuntu() {
# Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
apt-get install -y cargo
echo "Checking out sccache repo"
git clone https://github.com/mozilla/sccache -b v0.9.1
if [ -n "$CUDA_VERSION" ]; then
# TODO: Remove this
git clone https://github.com/pytorch/sccache
else
git clone https://github.com/mozilla/sccache -b v0.8.2
fi
cd sccache
echo "Building sccache"
cargo build --release
@ -36,33 +41,41 @@ sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
export PATH="/opt/cache/bin:$PATH"
# Setup compiler cache
install_ubuntu
if [ -n "$ROCM_VERSION" ]; then
curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache
else
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
if [ -n "$CUDA_VERSION" ]; then
# TODO: Install the pre-built binary from S3 as building from source
# https://github.com/pytorch/sccache has started failing mysteriously
# in which sccache server couldn't start with the following error:
# sccache: error: Invalid argument (os error 22)
install_binary
else
install_ubuntu
fi
fi
chmod a+x /opt/cache/bin/sccache
function write_sccache_stub() {
# Unset LD_PRELOAD for ps because of asan + ps issues
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
if [ $1 == "gcc" ]; then
# Do not call sccache recursively when dumping preprocessor argument
# For some reason it's very important for the first cached nvcc invocation
cat >"/opt/cache/bin/$1" <<EOF
# Do not call sccache recursively when dumping preprocessor argument
# For some reason it's very important for the first cached nvcc invocation
cat > "/opt/cache/bin/$1" <<EOF
#!/bin/sh
# sccache does not support -E flag, so we need to call the original compiler directly in order to avoid calling this wrapper recursively
for arg in "\$@"; do
if [ "\$arg" = "-E" ]; then
exec $(which $1) "\$@"
fi
done
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
if [ "\$1" = "-E" ] || [ "\$2" = "-E" ]; then
exec $(which $1) "\$@"
elif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
exec sccache $(which $1) "\$@"
else
exec $(which $1) "\$@"
fi
EOF
else
cat >"/opt/cache/bin/$1" <<EOF
cat > "/opt/cache/bin/$1" <<EOF
#!/bin/sh
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
@ -112,7 +125,7 @@ if [ -n "$ROCM_VERSION" ]; then
TOPDIR=$(dirname $OLDCOMP)
WRAPPED="$TOPDIR/original/$COMPNAME"
mv "$OLDCOMP" "$WRAPPED"
printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" >"$OLDCOMP"
printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" > "$OLDCOMP"
chmod a+x "$OLDCOMP"
}

View File

@ -25,8 +25,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
mkdir -p /opt/conda
chown jenkins:jenkins /opt/conda
SCRIPT_FOLDER="$( cd "$(dirname "$0")" ; pwd -P )"
source "${SCRIPT_FOLDER}/common_utils.sh"
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
pushd /tmp
wget -q "${BASE_URL}/${CONDA_FILE}"
@ -85,9 +84,8 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
# Magma package names are concatenation of CUDA major and minor ignoring revision
# I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
# Magma is installed from a tarball in the ossci-linux bucket into the conda env
if [ -n "$CUDA_VERSION" ]; then
${SCRIPT_FOLDER}/install_magma_conda.sh $(cut -f1-2 -d'.' <<< ${CUDA_VERSION}) ${ANACONDA_PYTHON_VERSION}
conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch
fi
# Install some other packages, including those needed for Python test reporting

View File

@ -70,7 +70,7 @@ function do_cpython_build {
# install setuptools since python 3.12 is required to use distutils
${prefix}/bin/pip install wheel==0.34.2 setuptools==68.2.2
local abi_tag=$(${prefix}/bin/python -c "from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag; print('{0}{1}-{2}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))")
ln -sf ${prefix} /opt/python/${abi_tag}
ln -s ${prefix} /opt/python/${abi_tag}
}
function build_cpython {

View File

@ -2,7 +2,7 @@
set -ex
NCCL_VERSION=v2.25.1-1
NCCL_VERSION=v2.21.5-1
CUDNN_VERSION=9.5.1.17
function install_cusparselt_040 {
@ -16,6 +16,17 @@ function install_cusparselt_040 {
rm -rf tmp_cusparselt
}
function install_cusparselt_052 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_cusparselt_062 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
@ -40,7 +51,6 @@ function install_cusparselt_063 {
function install_118 {
CUDNN_VERSION=9.1.0.70
NCCL_VERSION=v2.21.5-1
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
# install CUDA 11.8.0 in the same container
@ -73,6 +83,39 @@ function install_118 {
ldconfig
}
function install_121 {
echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
rm -rf /usr/local/cuda-12.1 /usr/local/cuda
# install CUDA 12.1.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
chmod +x cuda_12.1.1_530.30.02_linux.run
./cuda_12.1.1_530.30.02_linux.run --toolkit --silent
rm -f cuda_12.1.1_530.30.02_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_052
ldconfig
}
function install_124 {
CUDNN_VERSION=9.1.0.70
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
@ -171,6 +214,37 @@ function prune_118 {
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/
}
function prune_121 {
echo "Pruning CUDA 12.1"
#####################################################################################
# CUDA 12.1 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64"
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 12.1 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.1/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/
}
function prune_124 {
echo "Pruning CUDA 12.4"
#####################################################################################
@ -239,52 +313,18 @@ function prune_126 {
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
}
function install_128 {
CUDNN_VERSION=9.7.1.26
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
# install CUDA 12.8.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux.run
chmod +x cuda_12.8.0_570.86.10_linux.run
./cuda_12.8.0_570.86.10_linux.run --toolkit --silent
rm -f cuda_12.8.0_570.86.10_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_063
ldconfig
}
# idiomatic parameter and option handling in sh
while test $# -gt 0
do
case "$1" in
11.8) install_118; prune_118
;;
12.1) install_121; prune_121
;;
12.4) install_124; prune_124
;;
12.6) install_126; prune_126
;;
12.8) install_128;
;;
*) echo "bad argument $1"; exit 1
;;
esac

View File

@ -20,10 +20,10 @@ function install_cusparselt_062 {
function install_cusparselt_063 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz
tar xf libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz
cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
@ -57,7 +57,7 @@ function install_124 {
cd ..
rm -rf nccl
install_cusparselt_063
install_cusparselt_062
ldconfig
}
@ -160,40 +160,6 @@ function prune_126 {
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
}
function install_128 {
CUDNN_VERSION=9.7.1.26
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
# install CUDA 12.8.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux_sbsa.run
chmod +x cuda_12.8.0_570.86.10_linux_sbsa.run
./cuda_12.8.0_570.86.10_linux_sbsa.run --toolkit --silent
rm -f cuda_12.8.0_570.86.10_linux_sbsa.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_063
ldconfig
}
# idiomatic parameter and option handling in sh
while test $# -gt 0
do
@ -202,8 +168,6 @@ do
;;
12.6) install_126; prune_126
;;
12.8) install_128;
;;
*) echo "bad argument $1"; exit 1
;;
esac

View File

@ -4,9 +4,7 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn
pushd tmp_cudnn
if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.7.1.26_cuda12-archive"
elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive"

View File

@ -5,15 +5,7 @@ set -ex
# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && cd tmp_cusparselt
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-8]$ ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
arch_path='x86_64'
fi
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.3.2-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
@ -21,11 +13,17 @@ elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
fi
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.2.3-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
arch_path='x86_64'
fi
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.5.2.1-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
else
echo "Not sure which libcusparselt version to install for this ${CUDA_VERSION}"
fi
tar xf ${CUSPARSELT_NAME}.tar.xz

View File

@ -37,17 +37,14 @@ install_conda_dependencies() {
install_pip_dependencies() {
pushd executorch
as_jenkins bash install_executorch.sh
# A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current
# numba and scipy version used in PyTorch CI
conda_run pip uninstall -y numba scipy
as_jenkins bash install_requirements.sh --pybind xnnpack
popd
}
setup_executorch() {
pushd executorch
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
as_jenkins bash .ci/scripts/setup-vulkan-linux-deps.sh
export PYTHON_EXECUTABLE=python
export EXECUTORCH_BUILD_PYBIND=ON

View File

@ -7,20 +7,14 @@ source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
function install_huggingface() {
local version
commit=$(get_pinned_commit huggingface)
pip_install pandas==2.0.3
pip_install "git+https://github.com/huggingface/transformers@${commit}"
}
function install_timm() {
local commit
commit=$(get_pinned_commit timm)
# TODO (huydhn): There is no torchvision release on 3.13 when I write this, so
# I'm using nightly here instead. We just need to package to be able to install
# TIMM. Removing this once vision has a release on 3.13
if [[ "${ANACONDA_PYTHON_VERSION}" == "3.13" ]]; then
pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
fi
pip_install pandas==2.0.3
pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
# Clean up
conda_run pip uninstall -y cmake torch torchvision triton

View File

@ -3,6 +3,8 @@
set -eou pipefail
MAGMA_VERSION="2.5.2"
function do_install() {
cuda_version=$1
cuda_version_nodot=${1/./}
@ -15,7 +17,7 @@ function do_install() {
set -x
tmp_dir=$(mktemp -d)
pushd ${tmp_dir}
curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
curl -OLs https://anaconda.org/pytorch/magma-cuda${cuda_version_nodot}/${MAGMA_VERSION}/download/linux-64/${magma_archive}
tar -xvf "${magma_archive}"
mkdir -p "${cuda_dir}/magma"
mv include "${cuda_dir}/magma/include"

View File

@ -1,26 +0,0 @@
#!/usr/bin/env bash
# Script that replaces the magma install from a conda package
set -eou pipefail
function do_install() {
cuda_version_nodot=${1/./}
anaconda_python_version=$2
MAGMA_VERSION="2.6.1"
magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
anaconda_dir="/opt/conda/envs/py_${anaconda_python_version}"
(
set -x
tmp_dir=$(mktemp -d)
pushd ${tmp_dir}
curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
tar -xvf "${magma_archive}"
mv include/* "${anaconda_dir}/include/"
mv lib/* "${anaconda_dir}/lib"
popd
)
}
do_install $1 $2

View File

@ -31,15 +31,15 @@ pip_install \
pip_install coloredlogs packaging
pip_install onnxruntime==1.18.1
pip_install onnx==1.17.0
pip_install onnxscript==0.1.0 --no-deps
pip_install onnx==1.16.2
pip_install onnxscript==0.1.0.dev20241124 --no-deps
# required by onnxscript
pip_install ml_dtypes
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}"
as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}"
# Need a PyTorch version for transformers to work
pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu

View File

@ -62,22 +62,6 @@ install_ubuntu() {
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
done
# ROCm 6.3 had a regression where initializing static code objects had significant overhead
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then
# clr build needs CppHeaderParser but can only find it using conda's python
/opt/conda/bin/python -m pip install CppHeaderParser
git clone https://github.com/ROCm/HIP -b rocm-6.3.x
HIP_COMMON_DIR=$(readlink -f HIP)
git clone https://github.com/jeffdaily/clr -b release/rocm-rel-6.3-statco-hotfix
mkdir -p clr/build
pushd clr/build
cmake .. -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR
make -j
cp hipamd/lib/libamdhip64.so.6.3.* /opt/rocm/lib/libamdhip64.so.6.3.*
popd
rm -rf HIP clr
fi
# Cleanup
apt-get autoclean && apt-get clean
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*

View File

@ -8,12 +8,6 @@ else
with_cuda=no
fi
if [[ -d "/opt/rocm" ]]; then
with_rocm=/opt/rocm
else
with_rocm=no
fi
function install_ucx() {
set -ex
git clone --recursive https://github.com/openucx/ucx.git
@ -25,7 +19,6 @@ function install_ucx() {
./configure --prefix=$UCX_HOME \
--enable-mt \
--with-cuda=$with_cuda \
--with-rocm=$with_rocm \
--enable-profiling \
--enable-stats
time make -j
@ -43,29 +36,12 @@ function install_ucc() {
git submodule update --init --recursive
./autogen.sh
# We only run distributed tests on Tesla M60 and A10G
NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
if [[ -n "$ROCM_VERSION" ]]; then
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
else
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
fi
for arch in $amdgpu_targets; do
HIP_OFFLOAD="$HIP_OFFLOAD --offload-arch=$arch"
done
else
HIP_OFFLOAD="all-arch-no-native"
fi
./configure --prefix=$UCC_HOME \
--with-ucx=$UCX_HOME \
--with-cuda=$with_cuda \
--with-nvcc-gencode="${NVCC_GENCODE}" \
--with-rocm=$with_rocm \
--with-rocm-arch="${HIP_OFFLOAD}"
--with-nvcc-gencode="${NVCC_GENCODE}"
time make -j
sudo make install

View File

@ -56,6 +56,11 @@ RUN bash ./install_cuda.sh 11.8
RUN bash ./install_magma.sh 11.8
RUN ln -sf /usr/local/cuda-11.8 /usr/local/cuda
FROM cuda as cuda12.1
RUN bash ./install_cuda.sh 12.1
RUN bash ./install_magma.sh 12.1
RUN ln -sf /usr/local/cuda-12.1 /usr/local/cuda
FROM cuda as cuda12.4
RUN bash ./install_cuda.sh 12.4
RUN bash ./install_magma.sh 12.4
@ -66,11 +71,6 @@ RUN bash ./install_cuda.sh 12.6
RUN bash ./install_magma.sh 12.6
RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda
FROM cuda as cuda12.8
RUN bash ./install_cuda.sh 12.8
RUN bash ./install_magma.sh 12.8
RUN ln -sf /usr/local/cuda-12.8 /usr/local/cuda
FROM cpu as rocm
ARG PYTORCH_ROCM_ARCH
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
@ -92,6 +92,13 @@ RUN apt-get update -y && \
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
# Install AOTriton
COPY ./common/common_utils.sh common_utils.sh
COPY ./aotriton_version.txt aotriton_version.txt
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
FROM ${BASE_TARGET} as final
COPY --from=openssl /opt/openssl /opt/openssl
# Install patchelf

View File

@ -39,7 +39,17 @@ case ${GPU_ARCH_TYPE} in
BASE_TARGET=rocm
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100;gfx1101;gfx942"
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
else
echo "ERROR: rocm regex failed"
exit 1
fi
if [[ $ROCM_VERSION_INT -ge 60000 ]]; then
PYTORCH_ROCM_ARCH+=";gfx942"
fi
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
;;
*)

View File

@ -25,8 +25,7 @@ ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_magma_conda.sh install_magma_conda.sh
RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
# Install cuda and cudnn
ARG CUDA_VERSION

View File

@ -198,3 +198,10 @@ ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
ADD ./common/install_miopen.sh install_miopen.sh
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
# Install AOTriton
COPY ./common/common_utils.sh common_utils.sh
COPY ./aotriton_version.txt aotriton_version.txt
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton

View File

@ -97,7 +97,14 @@ case ${GPU_ARCH_TYPE} in
DEVTOOLSET_VERSION="11"
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
fi
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101"
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100"
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
else
echo "ERROR: rocm regex failed"
exit 1
fi
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
;;
xpu)

View File

@ -30,10 +30,10 @@ dill==0.3.7
#Pinned versions: 0.3.7
#test that import: dynamo/test_replay_record.py test_dataloader.py test_datapipe.py test_serialization.py
expecttest==0.3.0
expecttest==0.2.1
#Description: method for writing tests where test framework auto populates
# the expected output based on previous runs
#Pinned versions: 0.3.0
#Pinned versions: 0.2.1
#test that import:
fbscribelogger==0.1.7
@ -90,7 +90,7 @@ librosa>=0.6.2 ; python_version < "3.11"
#Pinned versions:
#test that import:
mypy==1.13.0
mypy==1.11.2
# Pin MyPy version because new errors are likely to appear with each release
#Description: linter
#Pinned versions: 1.10.0
@ -132,9 +132,6 @@ numpy==1.22.4; python_version == "3.9" or python_version == "3.10"
numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
numpy==2.1.2; python_version >= "3.13"
pandas==2.0.3; python_version < "3.13"
pandas==2.2.3; python_version >= "3.13"
#onnxruntime
#Description: scoring engine for Open Neural Network Exchange (ONNX) models
#Pinned versions: 1.9.0
@ -158,7 +155,7 @@ optree==0.13.0
#test_pointwise_ops.py, test_dtensor_ops.py, test_torchinductor.py, test_fx.py,
#test_fake_tensor.py, test_mps.py
pillow==11.0.0
pillow==10.3.0
#Description: Python Imaging Library fork
#Pinned versions: 10.3.0
#test that import:
@ -193,11 +190,6 @@ pytest-rerunfailures>=10.3
#Pinned versions:
#test that import:
pytest-subtests==0.13.1
#Description: plugin for subtest support
#Pinned versions:
#test that import:
#pytest-benchmark
#Description: fixture for benchmarking code
#Pinned versions: 3.2.3
@ -245,7 +237,7 @@ scikit-image==0.22.0 ; python_version >= "3.10"
#test that import:
scipy==1.10.1 ; python_version <= "3.11"
scipy==1.14.1 ; python_version >= "3.12"
scipy==1.12.0 ; python_version == "3.12"
# Pin SciPy because of failing distribution tests (see #60347)
#Description: scientific python
#Pinned versions: 1.10.1
@ -280,9 +272,9 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
#test that import:
#lintrunner is supported on aarch64-linux only from 0.12.4 version
lintrunner==0.12.7
lintrunner==0.12.5
#Description: all about linters!
#Pinned versions: 0.12.7
#Pinned versions: 0.12.5
#test that import:
redis>=4.0.0
@ -294,7 +286,7 @@ ghstack==0.8.0
#Pinned versions: 0.8.0
#test that import:
jinja2==3.1.5
jinja2==3.1.4
#Description: jinja2 template engine
#Pinned versions: 3.1.4
#test that import:
@ -304,32 +296,31 @@ pytest-cpp==2.3.0
#Pinned versions: 2.3.0
#test that import:
z3-solver==4.12.6.0
z3-solver==4.12.2.0
#Description: The Z3 Theorem Prover Project
#Pinned versions:
#test that import:
tensorboard==2.13.0 ; python_version < "3.13"
tensorboard==2.18.0 ; python_version >= "3.13"
tensorboard==2.13.0
#Description: Also included in .ci/docker/requirements-docs.txt
#Pinned versions:
#test that import: test_tensorboard
pywavelets==1.4.1 ; python_version < "3.12"
pywavelets==1.7.0 ; python_version >= "3.12"
pywavelets==1.5.0 ; python_version >= "3.12"
#Description: This is a requirement of scikit-image, we need to pin
# it here because 1.5.0 conflicts with numpy 1.21.2 used in CI
#Pinned versions: 1.4.1
#test that import:
lxml==5.3.0
lxml==5.0.0
#Description: This is a requirement of unittest-xml-reporting
# Python-3.9 binaries
PyGithub==2.3.0
sympy==1.13.3
sympy==1.13.1 ; python_version >= "3.9"
#Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt
#Pinned versions:
#test that import:
@ -339,7 +330,7 @@ onnx==1.17.0
#Pinned versions:
#test that import:
onnxscript==0.1.0
onnxscript==0.1.0.dev20240817
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
#Pinned versions:
#test that import:
@ -362,7 +353,6 @@ pwlf==2.2.1 ; python_version >= "3.8"
# To build PyTorch itself
astunparse
PyYAML
pyzstd
setuptools
ninja==1.11.1 ; platform_machine == "aarch64"
@ -372,8 +362,3 @@ pulp==2.9.0 ; python_version >= "3.8"
#Description: required for testing ilp formulaiton under torch/distributed/_tools
#Pinned versions: 2.9.0
#test that import: test_sac_ilp.py
dataclasses_json==0.6.7
#Description: required for data pipeline and scripts under tools/stats
#Pinned versions: 0.6.7
#test that import:

View File

@ -14,8 +14,7 @@ matplotlib==3.5.3
#Description: This is used to generate PyTorch docs
#Pinned versions: 3.5.3
tensorboard==2.13.0 ; python_version < "3.13"
tensorboard==2.18.0 ; python_version >= "3.13"
tensorboard==2.13.0
#Description: This is used to generate PyTorch docs
#Pinned versions: 2.13.0

View File

@ -30,8 +30,7 @@ ARG CONDA_CMAKE
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_magma_conda.sh install_magma_conda.sh
RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
# Install gcc
ARG GCC_VERSION
@ -81,8 +80,6 @@ RUN bash ./install_openssl.sh
ENV OPENSSL_DIR /opt/openssl
ARG INDUCTOR_BENCHMARKS
ARG ANACONDA_PYTHON_VERSION
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/huggingface.txt huggingface.txt

View File

@ -14,20 +14,21 @@ ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh
# Install clang
ARG LLVMDEV
ARG CLANG_VERSION
COPY ./common/install_clang.sh install_clang.sh
RUN bash ./install_clang.sh && rm install_clang.sh
# Install user
COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh
# Install katex
ARG KATEX
COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
# Install conda and other packages (e.g., numpy, pytest)
ARG ANACONDA_PYTHON_VERSION
ARG CONDA_CMAKE
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
ARG CONDA_CMAKE
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
@ -38,11 +39,6 @@ ARG GCC_VERSION
COPY ./common/install_gcc.sh install_gcc.sh
RUN bash ./install_gcc.sh && rm install_gcc.sh
# Install clang
ARG CLANG_VERSION
COPY ./common/install_clang.sh install_clang.sh
RUN bash ./install_clang.sh && rm install_clang.sh
# (optional) Install protobuf for ONNX
ARG PROTOBUF
COPY ./common/install_protobuf.sh install_protobuf.sh
@ -89,32 +85,6 @@ COPY ./common/install_amdsmi.sh install_amdsmi.sh
RUN bash ./install_amdsmi.sh
RUN rm install_amdsmi.sh
# (optional) Install UCC
ARG UCX_COMMIT
ARG UCC_COMMIT
ENV UCX_COMMIT $UCX_COMMIT
ENV UCC_COMMIT $UCC_COMMIT
ENV UCX_HOME /usr
ENV UCC_HOME /usr
ADD ./common/install_ucc.sh install_ucc.sh
RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
RUN rm install_ucc.sh
COPY ./common/install_openssl.sh install_openssl.sh
ENV OPENSSL_ROOT_DIR /opt/openssl
RUN bash ./install_openssl.sh
ENV OPENSSL_DIR /opt/openssl
ARG INDUCTOR_BENCHMARKS
ARG ANACONDA_PYTHON_VERSION
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ci_commit_pins/huggingface.txt huggingface.txt
COPY ci_commit_pins/timm.txt timm.txt
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
# (optional) Install non-default CMake version
ARG CMAKE_VERSION
COPY ./common/install_cmake.sh install_cmake.sh
@ -137,17 +107,18 @@ COPY triton_version.txt triton_version.txt
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
# Install AOTriton
COPY ./aotriton_version.txt aotriton_version.txt
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_aotriton.sh install_aotriton.sh
RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"]
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
# Install ccache/sccache (do this last, so we get priority in PATH)
COPY ./common/install_cache.sh install_cache.sh
ENV PATH /opt/cache/bin:$PATH
RUN bash ./install_cache.sh && rm install_cache.sh
# Install Open MPI for ROCm
COPY ./common/install_openmpi.sh install_openmpi.sh
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
RUN rm install_openmpi.sh
# Include BUILD_ENVIRONMENT environment variable in image
ARG BUILD_ENVIRONMENT
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}

View File

@ -36,8 +36,7 @@ ENV DOCS=$DOCS
COPY requirements-ci.txt requirements-docs.txt /opt/conda/
COPY ./common/install_conda.sh install_conda.sh
COPY ./common/common_utils.sh common_utils.sh
COPY ./common/install_magma_conda.sh install_magma_conda.sh
RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
RUN if [ -n "${UNINSTALL_DILL}" ]; then pip uninstall -y dill; fi
# Install gcc

View File

@ -16,9 +16,9 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
magma/build_magma.sh
.PHONY: all
all: magma-cuda128
all: magma-cuda126
all: magma-cuda124
all: magma-cuda121
all: magma-cuda118
.PHONY:
@ -26,12 +26,6 @@ clean:
$(RM) -r magma-*
$(RM) -r output
.PHONY: magma-cuda128
magma-cuda128: DESIRED_CUDA := 12.8
magma-cuda128: CUDA_ARCH_LIST += -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120
magma-cuda128:
$(DOCKER_RUN)
.PHONY: magma-cuda126
magma-cuda126: DESIRED_CUDA := 12.6
magma-cuda126:
@ -42,6 +36,11 @@ magma-cuda124: DESIRED_CUDA := 12.4
magma-cuda124:
$(DOCKER_RUN)
.PHONY: magma-cuda121
magma-cuda121: DESIRED_CUDA := 12.1
magma-cuda121:
$(DOCKER_RUN)
.PHONY: magma-cuda118
magma-cuda118: DESIRED_CUDA := 11.8
magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_37,code=sm_37

View File

@ -18,14 +18,12 @@ retry () {
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
}
PLATFORM="manylinux2014_x86_64"
# TODO move this into the Docker images
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
retry yum install -q -y zip openssl
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
retry yum install -q -y zip openssl
PLATFORM="manylinux_2_28_x86_64"
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
retry dnf install -q -y zip openssl
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
@ -255,11 +253,11 @@ make_wheel_record() {
FPATH=$1
if echo $FPATH | grep RECORD >/dev/null 2>&1; then
# if the RECORD file, then
echo "\"$FPATH\",,"
echo "$FPATH,,"
else
HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
FSIZE=$(ls -nl $FPATH | awk '{print $5}')
echo "\"$FPATH\",sha256=$HASH,$FSIZE"
echo "$FPATH,sha256=$HASH,$FSIZE"
fi
}
@ -379,12 +377,6 @@ for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.w
$PATCHELF_BIN --print-rpath $sofile
done
# create Manylinux 2_28 tag this needs to happen before regenerate the RECORD
if [[ $PLATFORM == "manylinux_2_28_x86_64" && $GPU_ARCH_TYPE != "cpu-s390x" && $GPU_ARCH_TYPE != "xpu" ]]; then
wheel_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/WHEEL/g')
sed -i -e s#linux_x86_64#"${PLATFORM}"# $wheel_file;
fi
# regenerate the RECORD file with new hashes
record_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g')
if [[ -e $record_file ]]; then
@ -424,20 +416,12 @@ for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.w
popd
fi
# Rename wheel for Manylinux 2_28
if [[ $PLATFORM == "manylinux_2_28_x86_64" && $GPU_ARCH_TYPE != "cpu-s390x" && $GPU_ARCH_TYPE != "xpu" ]]; then
pkg_name=$(echo $(basename $pkg) | sed -e s#linux_x86_64#"${PLATFORM}"#)
zip -rq $pkg_name $PREIX*
rm -f $pkg
mv $pkg_name $(dirname $pkg)/$pkg_name
else
# zip up the wheel back
zip -rq $(basename $pkg) $PREIX*
# remove original wheel
rm -f $pkg
mv $(basename $pkg) $pkg
fi
# zip up the wheel back
zip -rq $(basename $pkg) $PREIX*
# replace original wheel
rm -f $pkg
mv $(basename $pkg) $pkg
cd ..
rm -rf tmp
done
@ -490,9 +474,9 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
echo "$(date) :: Running tests"
pushd "$PYTORCH_ROOT"
#TODO: run_tests.sh and check_binary.sh should be moved to pytorch/pytorch project
LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
"${PYTORCH_ROOT}/.ci/pytorch/run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
"/builder/run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
popd
echo "$(date) :: Finished tests"
fi

View File

@ -14,7 +14,6 @@ export USE_CUDA_STATIC_LINK=1
export INSTALL_TEST=0 # dont install test binaries into site-packages
export USE_CUPTI_SO=0
export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
export USE_CUFILE=${USE_CUFILE:-1}
# Keep an array of cmake variables to add to
if [[ -z "$CMAKE_ARGS" ]]; then
@ -44,6 +43,13 @@ if [[ -n "$DESIRED_CUDA" ]]; then
fi
fi
echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"
# There really has to be a better way to do this - eli
# Possibly limiting builds to specific cuda versions be delimiting images would be a choice
if [[ "$OS_NAME" == *"Ubuntu"* ]]; then
echo "Switching to CUDA version ${DESIRED_CUDA}"
/builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}"
fi
else
CUDA_VERSION=$(nvcc --version|grep release|cut -f5 -d" "|cut -f1 -d",")
echo "CUDA $CUDA_VERSION Detected"
@ -53,15 +59,23 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
case ${CUDA_VERSION} in
12.8)
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0;10.0;12.0+PTX" #Ripping out 5.0 and 6.0 due to ld error
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
;;
12.6)
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
TORCH_CUDA_ARCH_LIST="9.0"
else
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
fi
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
;;
12.4)
if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
TORCH_CUDA_ARCH_LIST="9.0"
else
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
fi
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
;;
12.1)
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
;;
@ -119,16 +133,7 @@ if [[ $USE_CUSPARSELT == "1" && $CUDA_VERSION == "11.8" ]]; then
)
fi
# Turn USE_CUFILE off for CUDA 11.8, 12.4 since nvidia-cufile-cu11 and 1.9.0.20 are
# not available in PYPI
if [[ $CUDA_VERSION == "11.8" || $CUDA_VERSION == "12.4" ]]; then
export USE_CUFILE=0
fi
# CUDA_VERSION 12.4, 12.6, 12.8
if [[ $CUDA_VERSION == 12* ]]; then
if [[ $CUDA_VERSION == "12.4" || $CUDA_VERSION == "12.6" ]]; then
export USE_STATIC_CUDNN=0
# Try parallelizing nvcc as well
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
@ -169,16 +174,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
"libnvrtc.so.12"
"libnvrtc-builtins.so"
)
if [[ $USE_CUFILE == 1 ]]; then
DEPS_LIST+=(
"/usr/local/cuda/lib64/libcufile.so.0"
"/usr/local/cuda/lib64/libcufile_rdma.so.1"
)
DEPS_SONAME+=(
"libcufile.so.0"
"libcufile_rdma.so.1"
)
fi
else
echo "Using nvidia libs from pypi."
CUDA_RPATHS=(
@ -195,11 +190,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
'$ORIGIN/../../nvidia/nccl/lib'
'$ORIGIN/../../nvidia/nvtx/lib'
)
if [[ $USE_CUFILE == 1 ]]; then
CUDA_RPATHS+=(
'$ORIGIN/../../nvidia/cufile/lib'
)
fi
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
@ -285,7 +275,7 @@ else
exit 1
fi
# run_tests.sh requires DESIRED_CUDA to know what tests to exclude
# builder/test.sh requires DESIRED_CUDA to know what tests to exclude
export DESIRED_CUDA="$cuda_version_nodot"
# Switch `/usr/local/cuda` to the desired CUDA version

View File

@ -225,11 +225,11 @@ make_wheel_record() {
FPATH=$1
if echo $FPATH | grep RECORD >/dev/null 2>&1; then
# if the RECORD file, then
echo "\"$FPATH\",,"
echo "$FPATH,,"
else
HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
FSIZE=$(ls -nl $FPATH | awk '{print $5}')
echo "\"$FPATH\",sha256=$HASH,$FSIZE"
echo "$FPATH,sha256=$HASH,$FSIZE"
fi
}

View File

@ -107,29 +107,17 @@ if [[ $ROCM_INT -ge 60200 ]]; then
fi
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
if [[ "$OS_NAME" == *"CentOS Linux"* || "$OS_NAME" == *"AlmaLinux"* ]]; then
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
LIBNUMA_PATH="/usr/lib64/libnuma.so.1"
LIBELF_PATH="/usr/lib64/libelf.so.1"
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
LIBTINFO_PATH="/usr/lib64/libtinfo.so.5"
else
LIBTINFO_PATH="/usr/lib64/libtinfo.so.6"
fi
LIBTINFO_PATH="/usr/lib64/libtinfo.so.5"
LIBDRM_PATH="/opt/amdgpu/lib64/libdrm.so.2"
LIBDRM_AMDGPU_PATH="/opt/amdgpu/lib64/libdrm_amdgpu.so.1"
if [[ $ROCM_INT -ge 60100 && $ROCM_INT -lt 60300 ]]; then
if [[ $ROCM_INT -ge 60100 ]]; then
# Below libs are direct dependencies of libhipsolver
LIBSUITESPARSE_CONFIG_PATH="/lib64/libsuitesparseconfig.so.4"
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
LIBCHOLMOD_PATH="/lib64/libcholmod.so.2"
# Below libs are direct dependencies of libsatlas
LIBGFORTRAN_PATH="/lib64/libgfortran.so.3"
else
LIBCHOLMOD_PATH="/lib64/libcholmod.so.3"
# Below libs are direct dependencies of libsatlas
LIBGFORTRAN_PATH="/lib64/libgfortran.so.5"
fi
LIBCHOLMOD_PATH="/lib64/libcholmod.so.2"
# Below libs are direct dependencies of libcholmod
LIBAMD_PATH="/lib64/libamd.so.2"
LIBCAMD_PATH="/lib64/libcamd.so.2"
@ -137,6 +125,7 @@ if [[ "$OS_NAME" == *"CentOS Linux"* || "$OS_NAME" == *"AlmaLinux"* ]]; then
LIBCOLAMD_PATH="/lib64/libcolamd.so.2"
LIBSATLAS_PATH="/lib64/atlas/libsatlas.so.3"
# Below libs are direct dependencies of libsatlas
LIBGFORTRAN_PATH="/lib64/libgfortran.so.3"
LIBQUADMATH_PATH="/lib64/libquadmath.so.0"
fi
MAYBE_LIB64=lib64
@ -151,7 +140,7 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
fi
LIBDRM_PATH="/usr/lib/x86_64-linux-gnu/libdrm.so.2"
LIBDRM_AMDGPU_PATH="/usr/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1"
if [[ $ROCM_INT -ge 60100 && $ROCM_INT -lt 60300 ]]; then
if [[ $ROCM_INT -ge 60100 ]]; then
# Below libs are direct dependencies of libhipsolver
LIBCHOLMOD_PATH="/lib/x86_64-linux-gnu/libcholmod.so.3"
# Below libs are direct dependencies of libcholmod
@ -186,6 +175,12 @@ do
OS_SO_FILES[${#OS_SO_FILES[@]}]=$file_name # Append lib to array
done
# PyTorch-version specific
# AOTriton dependency only for PyTorch >= 2.4
if (( $(echo "${PYTORCH_VERSION} 2.4" | awk '{print ($1 >= $2)}') )); then
ROCM_SO_FILES+=("libaotriton_v2.so")
fi
# rocBLAS library files
ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
ROCBLAS_LIB_DST=lib/rocblas/library

View File

@ -1,6 +1,6 @@
#!/bin/bash
set -ex -o pipefail
set -ex
# Required environment variable: $BUILD_ENVIRONMENT
# (This is set by default in the Docker images we build, so you don't
@ -87,7 +87,7 @@ else
# Workaround required for MKL library linkage
# https://github.com/pytorch/pytorch/issues/119557
if [[ "$ANACONDA_PYTHON_VERSION" = "3.12" || "$ANACONDA_PYTHON_VERSION" = "3.13" ]]; then
if [ "$ANACONDA_PYTHON_VERSION" = "3.12" ]; then
export CMAKE_LIBRARY_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/lib/"
export CMAKE_INCLUDE_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/include/"
fi
@ -191,7 +191,7 @@ fi
# We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
# memory to build and will OOM
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]]; then
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ "$TORCH_CUDA_ARCH_LIST" == *"8.6"* || "$TORCH_CUDA_ARCH_LIST" == *"8.0"* ]]; then
echo "WARNING: FlashAttention files require large amounts of memory to build and will OOM"
echo "Setting MAX_JOBS=(nproc-2)/3 to reduce memory usage"
export MAX_JOBS="$(( $(nproc --ignore=2) / 3 ))"
@ -228,9 +228,9 @@ if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
export CMAKE_BUILD_TYPE=RelWithAssert
fi
# Do not change workspace permissions for ROCm and s390x CI jobs
# Do not change workspace permissions for ROCm CI jobs
# as it can leave workspace with bad permissions for cancelled jobs
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
cleanup_workspace() {
@ -247,9 +247,10 @@ if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /v
fi
if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
set -e -o pipefail
set -e
get_bazel
install_sccache_nvcc_for_bazel
# Leave 1 CPU free and use only up to 80% of memory to reduce the change of crashing
# the runner
@ -278,7 +279,7 @@ else
"$BUILD_ENVIRONMENT" != *xla* ]]; then
if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
python -mpip install numpy==2.0.2
python -mpip install --pre numpy==2.0.2
fi
WERROR=1 python setup.py clean
@ -395,7 +396,7 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
# don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
python tools/stats/export_test_times.py
fi
# don't do this for bazel or s390x as they don't use sccache
if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
if [[ "$BUILD_ENVIRONMENT" != *s390x* ]]; then
print_sccache_stats
fi

View File

@ -1,394 +0,0 @@
#!/bin/bash
# shellcheck disable=SC2086,SC2006,SC2207,SC2076,SC2155,SC2046,SC1091,SC2143
# TODO: Re-enable shellchecks above
set -eux -o pipefail
# This script checks the following things on binaries
# 1. The gcc abi matches DESIRED_DEVTOOLSET
# 2. MacOS binaries do not link against OpenBLAS
# 3. There are no protobuf symbols of any sort anywhere (turned off, because
# this is currently not true)
# 4. Standard Python imports work
# 5. MKL is available everywhere except for MacOS wheels
# 6. XNNPACK is available everywhere except for MacOS wheels
# 7. CUDA is setup correctly and does not hang
# 8. Magma is available for CUDA builds
# 9. CuDNN is available for CUDA builds
#
# This script needs the env variables DESIRED_PYTHON, DESIRED_CUDA,
# DESIRED_DEVTOOLSET and PACKAGE_TYPE
#
# This script expects PyTorch to be installed into the active Python (the
# Python returned by `which python`). Or, if this is testing a libtorch
# Pythonless binary, then it expects to be in the root folder of the unzipped
# libtorch package.
if [[ -z ${DESIRED_PYTHON:-} ]]; then
export DESIRED_PYTHON=${MATRIX_PYTHON_VERSION:-}
fi
if [[ -z ${DESIRED_CUDA:-} ]]; then
export DESIRED_CUDA=${MATRIX_DESIRED_CUDA:-}
fi
if [[ -z ${DESIRED_DEVTOOLSET:-} ]]; then
export DESIRED_DEVTOOLSET=${MATRIX_DESIRED_DEVTOOLSET:-}
fi
if [[ -z ${PACKAGE_TYPE:-} ]]; then
export PACKAGE_TYPE=${MATRIX_PACKAGE_TYPE:-}
fi
# The install root depends on both the package type and the os
# All MacOS packages use conda, even for the wheel packages.
if [[ "$PACKAGE_TYPE" == libtorch ]]; then
# NOTE: Only $PWD works on both CentOS and Ubuntu
export install_root="$PWD"
else
if [[ $DESIRED_PYTHON =~ ([0-9].[0-9]+)t ]]; then
# For python that is maj.mint keep original version
py_dot="$DESIRED_PYTHON"
elif [[ $DESIRED_PYTHON =~ ([0-9].[0-9]+) ]]; then
# Strip everything but major.minor from DESIRED_PYTHON version
py_dot="${BASH_REMATCH[0]}"
else
echo "Unexpected ${DESIRED_PYTHON} format"
exit 1
fi
export install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/"
fi
###############################################################################
# Setup XPU ENV
###############################################################################
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
set +u
# Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
source /opt/intel/oneapi/compiler/latest/env/vars.sh
source /opt/intel/oneapi/pti/latest/env/vars.sh
fi
###############################################################################
# Check GCC ABI
###############################################################################
# NOTE [ Building libtorch with old vs. new gcc ABI ]
#
# Packages built with one version of ABI could not be linked against by client
# C++ libraries that were compiled using the other version of ABI. Since both
# gcc ABIs are still common in the wild, we need to support both ABIs. Currently:
#
# - All the nightlies built on CentOS 7 + devtoolset7 use the old gcc ABI.
# - All the nightlies built on Ubuntu 16.04 + gcc 5.4 use the new gcc ABI.
echo "Checking that the gcc ABI is what we expect"
if [[ "$(uname)" != 'Darwin' ]]; then
function is_expected() {
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* || "$DESIRED_CUDA" == *"rocm"* ]]; then
if [[ "$1" -gt 0 || "$1" == "ON " ]]; then
echo 1
fi
else
if [[ -z "$1" || "$1" == 0 || "$1" == "OFF" ]]; then
echo 1
fi
fi
}
# First we check that the env var in TorchConfig.cmake is correct
# We search for D_GLIBCXX_USE_CXX11_ABI=1 in torch/TorchConfig.cmake
torch_config="${install_root}/share/cmake/Torch/TorchConfig.cmake"
if [[ ! -f "$torch_config" ]]; then
echo "No TorchConfig.cmake found!"
ls -lah "$install_root/share/cmake/Torch"
exit 1
fi
echo "Checking the TorchConfig.cmake"
cat "$torch_config"
# The sed call below is
# don't print lines by default (only print the line we want)
# -n
# execute the following expression
# e
# replace lines that match with the first capture group and print
# s/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p
# any characters, D_GLIBCXX_USE_CXX11_ABI=, exactly one any character, a
# quote, any characters
# Note the exactly one single character after the '='. In the case that the
# variable is not set the '=' will be followed by a '"' immediately and the
# line will fail the match and nothing will be printed; this is what we
# want. Otherwise it will capture the 0 or 1 after the '='.
# /.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/
# replace the matched line with the capture group and print
# /\1/p
actual_gcc_abi="$(sed -ne 's/.*D_GLIBCXX_USE_CXX11_ABI=\(.\)".*/\1/p' < "$torch_config")"
if [[ "$(is_expected "$actual_gcc_abi")" != 1 ]]; then
echo "gcc ABI $actual_gcc_abi not as expected."
exit 1
fi
# We also check that there are [not] cxx11 symbols in libtorch
#
echo "Checking that symbols in libtorch.so have the right gcc abi"
python3 "$(dirname ${BASH_SOURCE[0]})/smoke_test/check_binary_symbols.py"
echo "cxx11 symbols seem to be in order"
fi # if on Darwin
###############################################################################
# Check for no OpenBLAS
# TODO Check for no Protobuf symbols (not finished)
# Print *all* runtime dependencies
###############################################################################
# We have to loop through all shared libraries for this
if [[ "$(uname)" == 'Darwin' ]]; then
all_dylibs=($(find "$install_root" -name '*.dylib'))
for dylib in "${all_dylibs[@]}"; do
echo "All dependencies of $dylib are $(otool -L $dylib) with rpath $(otool -l $dylib | grep LC_RPATH -A2)"
# Check that OpenBlas is not linked to on Macs
echo "Checking the OpenBLAS is not linked to"
if [[ -n "$(otool -L $dylib | grep -i openblas)" ]]; then
echo "ERROR: Found openblas as a dependency of $dylib"
echo "Full dependencies is: $(otool -L $dylib)"
exit 1
fi
# Check for protobuf symbols
#proto_symbols="$(nm $dylib | grep protobuf)" || true
#if [[ -n "$proto_symbols" ]]; then
# echo "ERROR: Detected protobuf symbols in $dylib"
# echo "Symbols are $proto_symbols"
# exit 1
#fi
done
else
all_libs=($(find "$install_root" -name '*.so'))
for lib in "${all_libs[@]}"; do
echo "All dependencies of $lib are $(ldd $lib) with runpath $(objdump -p $lib | grep RUNPATH)"
# Check for protobuf symbols
#proto_symbols=$(nm $lib | grep protobuf) || true
#if [[ -n "$proto_symbols" ]]; then
# echo "ERROR: Detected protobuf symbols in $lib"
# echo "Symbols are $proto_symbols"
# exit 1
#fi
done
fi
setup_link_flags () {
REF_LIB="-Wl,-R${install_root}/lib"
if [[ "$(uname)" == 'Darwin' ]]; then
REF_LIB="-Wl,-rpath ${install_root}/lib"
fi
ADDITIONAL_LINKER_FLAGS=""
if [[ "$(uname)" == 'Linux' ]]; then
ADDITIONAL_LINKER_FLAGS="-Wl,--no-as-needed"
fi
C10_LINK_FLAGS=""
if [ -f "${install_root}/lib/libc10.so" ] || [ -f "${install_root}/lib/libc10.dylib" ]; then
C10_LINK_FLAGS="-lc10"
fi
TORCH_CPU_LINK_FLAGS=""
if [ -f "${install_root}/lib/libtorch_cpu.so" ] || [ -f "${install_root}/lib/libtorch_cpu.dylib" ]; then
TORCH_CPU_LINK_FLAGS="-ltorch_cpu"
fi
TORCH_CUDA_LINK_FLAGS=""
if [ -f "${install_root}/lib/libtorch_cuda.so" ] || [ -f "${install_root}/lib/libtorch_cuda.dylib" ]; then
TORCH_CUDA_LINK_FLAGS="-ltorch_cuda"
elif [ -f "${install_root}/lib/libtorch_cuda_cpp.so" ] && [ -f "${install_root}/lib/libtorch_cuda_cpp.so" ] || \
[ -f "${install_root}/lib/libtorch_cuda_cu.dylib" ] && [ -f "${install_root}/lib/libtorch_cuda_cu.dylib" ]; then
TORCH_CUDA_LINK_FLAGS="-ltorch_cuda_cpp -ltorch_cuda_cu"
fi
}
TEST_CODE_DIR="$(dirname $(realpath ${BASH_SOURCE[0]}))/test_example_code"
build_and_run_example_cpp () {
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
GLIBCXX_USE_CXX11_ABI=1
else
GLIBCXX_USE_CXX11_ABI=0
fi
setup_link_flags
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
./$1
}
build_example_cpp_with_incorrect_abi () {
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
GLIBCXX_USE_CXX11_ABI=0
else
GLIBCXX_USE_CXX11_ABI=1
fi
set +e
setup_link_flags
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -D_GLIBCXX_USE_CXX11_ABI=$GLIBCXX_USE_CXX11_ABI -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
ERRCODE=$?
set -e
if [ "$ERRCODE" -eq "0" ]; then
echo "Building example with incorrect ABI didn't throw error. Aborting."
exit 1
else
echo "Building example with incorrect ABI throws expected error. Proceeding."
fi
}
###############################################################################
# Check simple Python/C++ calls
###############################################################################
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
# NS: Set LD_LIBRARY_PATH for CUDA builds, but perhaps it should be removed
if [[ "$DESIRED_CUDA" == "cu"* ]]; then
export LD_LIBRARY_PATH=/usr/local/cuda/lib64
fi
build_and_run_example_cpp simple-torch-test
# `_GLIBCXX_USE_CXX11_ABI` is always ignored by gcc in devtoolset7, so we test
# the expected failure case for Ubuntu 16.04 + gcc 5.4 only.
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
build_example_cpp_with_incorrect_abi simple-torch-test
fi
else
pushd /tmp
python -c 'import torch'
popd
fi
###############################################################################
# Check torch.git_version
###############################################################################
if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then
pushd /tmp
python -c 'import torch; assert torch.version.git_version != "Unknown"'
python -c 'import torch; assert torch.version.git_version != None'
popd
fi
###############################################################################
# Check for MKL
###############################################################################
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
echo "Checking that MKL is available"
build_and_run_example_cpp check-torch-mkl
elif [[ "$(uname -m)" != "arm64" && "$(uname -m)" != "s390x" ]]; then
if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]]; then
if [[ "$(uname -m)" == "aarch64" ]]; then
echo "Checking that MKLDNN is available on aarch64"
pushd /tmp
python -c 'import torch; exit(0 if torch.backends.mkldnn.is_available() else 1)'
popd
else
echo "Checking that MKL is available"
pushd /tmp
python -c 'import torch; exit(0 if torch.backends.mkl.is_available() else 1)'
popd
fi
fi
fi
###############################################################################
# Check for XNNPACK
###############################################################################
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
echo "Checking that XNNPACK is available"
build_and_run_example_cpp check-torch-xnnpack
else
if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]] && [[ "$(uname -m)" != "s390x" ]]; then
echo "Checking that XNNPACK is available"
pushd /tmp
python -c 'import torch.backends.xnnpack; exit(0 if torch.backends.xnnpack.enabled else 1)'
popd
fi
fi
###############################################################################
# Check CUDA configured correctly
###############################################################################
# Skip these for Windows machines without GPUs
if [[ "$OSTYPE" == "msys" ]]; then
GPUS=$(wmic path win32_VideoController get name)
if [[ ! "$GPUS" == *NVIDIA* ]]; then
echo "Skip CUDA tests for machines without a Nvidia GPU card"
exit 0
fi
fi
# Test that CUDA builds are setup correctly
if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'xpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* && "$(uname -m)" != "s390x" ]]; then
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
build_and_run_example_cpp check-torch-cuda
else
pushd /tmp
echo "Checking that CUDA archs are setup correctly"
timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()'
# These have to run after CUDA is initialized
echo "Checking that magma is available"
python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)'
echo "Checking that CuDNN is available"
python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)'
# Validates builds is free of linker regressions reported in https://github.com/pytorch/pytorch/issues/57744
echo "Checking that exception handling works"
python -c "import torch; from unittest import TestCase;TestCase().assertRaises(RuntimeError, lambda:torch.eye(7, 7, device='cuda:7'))"
echo "Checking that basic RNN works"
python ${TEST_CODE_DIR}/rnn_smoke.py
echo "Checking that basic CNN works"
python "${TEST_CODE_DIR}/cnn_smoke.py"
echo "Test that linalg works"
python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.svd(torch.mm(x.t(), x)))"
popd
fi # if libtorch
fi # if cuda
##########################
# Run parts of smoke tests
##########################
if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then
pushd "$(dirname ${BASH_SOURCE[0]})/smoke_test"
python -c "from smoke_test import test_linalg; test_linalg()"
if [[ "$DESIRED_CUDA" == *cuda* ]]; then
python -c "from smoke_test import test_linalg; test_linalg('cuda')"
fi
popd
fi
###############################################################################
# Check PyTorch supports TCP_TLS gloo transport
###############################################################################
if [[ "$(uname)" == 'Linux' && "$PACKAGE_TYPE" != 'libtorch' ]]; then
GLOO_CHECK="import torch.distributed as dist
try:
dist.init_process_group('gloo', rank=0, world_size=1)
except RuntimeError as e:
print(e)
"
RESULT=`GLOO_DEVICE_TRANSPORT=TCP_TLS MASTER_ADDR=localhost MASTER_PORT=63945 python -c "$GLOO_CHECK"`
GLOO_TRANSPORT_IS_NOT_SUPPORTED='gloo transport is not supported'
if [[ "$RESULT" =~ "$GLOO_TRANSPORT_IS_NOT_SUPPORTED" ]]; then
echo "PyTorch doesn't support TLS_TCP transport, please build with USE_GLOO_WITH_OPENSSL=1"
exit 1
fi
fi
###############################################################################
# Check for C++ ABI compatibility between gcc7 and gcc9 compiled binaries
###############################################################################
if [[ "$(uname)" == 'Linux' && "$PACKAGE_TYPE" == 'manywheel' ]]; then
pushd /tmp
python -c "import torch; exit(0 if torch.compiled_with_cxx11_abi() else (0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1011' else 1))"
popd
fi

View File

@ -3,7 +3,7 @@
# Common setup for all Jenkins scripts
# shellcheck source=./common_utils.sh
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
set -ex -o pipefail
set -ex
# Required environment variables:
# $BUILD_ENVIRONMENT (should be set by your Docker image)

View File

@ -111,6 +111,26 @@ function get_bazel() {
chmod u+x tools/bazel
}
# This function is bazel specific because of the bug
# in the bazel that requires some special paths massaging
# as a workaround. See
# https://github.com/bazelbuild/bazel/issues/10167
function install_sccache_nvcc_for_bazel() {
sudo mv /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc-real
# Write the `/usr/local/cuda/bin/nvcc`
cat << EOF | sudo tee /usr/local/cuda/bin/nvcc
#!/bin/sh
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
exec sccache /usr/local/cuda/bin/nvcc "\$@"
else
exec external/local_cuda/cuda/bin/nvcc-real "\$@"
fi
EOF
sudo chmod +x /usr/local/cuda/bin/nvcc
}
function install_monkeytype {
# Install MonkeyType
pip_install MonkeyType
@ -160,7 +180,7 @@ function install_torchvision() {
}
function install_tlparse() {
pip_install --user "tlparse==0.3.30"
pip_install --user "tlparse==0.3.25"
PATH="$(python -m site --user-base)/bin:$PATH"
}
@ -169,34 +189,24 @@ function install_torchrec_and_fbgemm() {
torchrec_commit=$(get_pinned_commit torchrec)
local fbgemm_commit
fbgemm_commit=$(get_pinned_commit fbgemm)
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then
fbgemm_commit=$(get_pinned_commit fbgemm_rocm)
fi
pip_uninstall torchrec-nightly
pip_uninstall fbgemm-gpu-nightly
pip_install setuptools-git-versioning scikit-build pyre-extensions
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then
# install torchrec first because it installs fbgemm nightly on top of rocm fbgemm
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
pip_uninstall fbgemm-gpu-nightly
# TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it
# seems to be an sccache-related issue
if [[ "$IS_A100_RUNNER" == "1" ]]; then
unset CMAKE_CUDA_COMPILER_LAUNCHER
sudo mv /opt/cache/bin /opt/cache/bin-backup
fi
pip_install tabulate # needed for newer fbgemm
pip_install patchelf # needed for rocm fbgemm
git clone --recursive https://github.com/pytorch/fbgemm
pushd fbgemm/fbgemm_gpu
git checkout "${fbgemm_commit}"
python setup.py install \
--package_variant=rocm \
-DHIP_ROOT_DIR="${ROCM_PATH}" \
-DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
-DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
popd
rm -rf fbgemm
else
# See https://github.com/pytorch/pytorch/issues/106971
CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
# See https://github.com/pytorch/pytorch/issues/106971
CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
if [[ "$IS_A100_RUNNER" == "1" ]]; then
export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
sudo mv /opt/cache/bin-backup /opt/cache/bin
fi
}
@ -226,11 +236,6 @@ function checkout_install_torchbench() {
# to install and test other models
python install.py --continue_on_fail
fi
# TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
# is regressing speedup metric. This needs to be investigated further
pip install transformers==4.38.1
echo "Print all dependencies after TorchBench is installed"
python -mpip freeze
popd

View File

@ -40,7 +40,7 @@ echo "Building PyTorch C++ API docs..."
rm -rf cppdocs
git clone https://github.com/pytorch/cppdocs
set -ex -o pipefail
set -ex
# Generate ATen files
pushd "${pt_checkout}"

View File

@ -5,7 +5,7 @@ pt_checkout="/var/lib/jenkins/workspace"
source "$pt_checkout/.ci/pytorch/common_utils.sh"
echo "functorch_doc_push_script.sh: Invoked with $*"
set -ex -o pipefail
set -ex
version=${DOCS_VERSION:-nightly}
echo "version: $version"

View File

@ -6,7 +6,7 @@
# return the same thing, ex checks for for rocm, CUDA, and changing the path
# where sccache is installed, and not changing /etc/environment.
set -ex -o pipefail
set -ex
install_binary() {
echo "Downloading sccache binary from S3 repo"

View File

@ -18,9 +18,6 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available(
fi
popd
# enable debug asserts in serialization
export TORCH_SERIALIZATION_DEBUG=1
setup_test_python() {
# The CircleCI worker hostname doesn't resolve to an address.
# This environment variable makes ProcessGroupGloo default to
@ -193,19 +190,11 @@ test_torchbench_perf() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
local backend=eager
local dtype=notset
local device=mps
echo "Setup complete, launching torchbench training performance run"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
--performance --backend "$backend" --training --devices "$device" \
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
echo "Launching torchbench inference performance run"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
--performance --backend "$backend" --inference --devices "$device" \
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
echo "Pytorch benchmark on mps device completed"
}
@ -220,27 +209,26 @@ test_torchbench_smoketest() {
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
local backend=eager
local dtype=notset
local device=mps
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
touch "$TEST_REPORTS_DIR"/torchbench_training.csv
touch "$TEST_REPORTS_DIR"/torchbench_inference.csv
echo "Setup complete, launching torchbench training performance run"
for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
--performance --only "$model" --backend "$backend" --training --devices "$device" \
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
done
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_T5 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only llama --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only BERT_pytorch --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only dcgan --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_GPT2 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only yolov3 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only resnet152 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
echo "Launching torchbench inference performance run"
for model in hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152; do
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
--performance --only "$model" --backend "$backend" --inference --devices "$device" \
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
done
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_T5 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only llama --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only BERT_pytorch --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only dcgan --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_GPT2 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only yolov3 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only resnet152 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
echo "Pytorch benchmark on mps device completed"
}
@ -279,6 +267,25 @@ test_timm_perf() {
install_tlparse
if [[ $TEST_CONFIG == *"test_mps"* ]]; then
if [[ $NUM_TEST_SHARDS -gt 1 ]]; then
test_python_shard "${SHARD_NUMBER}"
if [[ "${SHARD_NUMBER}" == 1 ]]; then
test_libtorch
test_custom_script_ops
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
test_jit_hooks
test_custom_backend
fi
else
test_python_all
test_libtorch
test_custom_script_ops
test_jit_hooks
test_custom_backend
fi
fi
if [[ $TEST_CONFIG == *"perf_all"* ]]; then
test_torchbench_perf
test_hf_perf
@ -291,19 +298,4 @@ elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then
test_timm_perf
elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then
test_torchbench_smoketest
elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then
test_python_shard "${SHARD_NUMBER}"
if [[ "${SHARD_NUMBER}" == 1 ]]; then
test_libtorch
test_custom_script_ops
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
test_jit_hooks
test_custom_backend
fi
else
test_python_all
test_libtorch
test_custom_script_ops
test_jit_hooks
test_custom_backend
fi

View File

@ -8,62 +8,55 @@
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
echo "Testing pytorch"
# When adding more tests, please use HUD to see which shard is shorter
if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
# FSDP tests
for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
fi
time python test/run_test.py --include test_cuda_multigpu test_cuda_primary_ctx --verbose
if [[ "${SHARD_NUMBER:-2}" == "2" ]]; then
time python test/run_test.py --include test_cuda_multigpu test_cuda_primary_ctx --verbose
# Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
# python tools/download_mnist.py --quiet -d test/cpp/api/mnist
# OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
time python test/run_test.py --verbose -i distributed/test_c10d_common
time python test/run_test.py --verbose -i distributed/test_c10d_gloo
time python test/run_test.py --verbose -i distributed/test_c10d_nccl
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering
time python test/run_test.py --verbose -i distributed/test_store
time python test/run_test.py --verbose -i distributed/test_symmetric_memory
time python test/run_test.py --verbose -i distributed/test_pg_wrapper
time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
# FSDP tests
for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
# ShardedTensor tests
time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
# Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
# python tools/download_mnist.py --quiet -d test/cpp/api/mnist
# OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
time python test/run_test.py --verbose -i distributed/test_c10d_common
time python test/run_test.py --verbose -i distributed/test_c10d_gloo
time python test/run_test.py --verbose -i distributed/test_c10d_nccl
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering
time python test/run_test.py --verbose -i distributed/test_store
time python test/run_test.py --verbose -i distributed/test_symmetric_memory
time python test/run_test.py --verbose -i distributed/test_pg_wrapper
time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
# functional collective tests
time python test/run_test.py --verbose -i distributed/test_functional_api
# ShardedTensor tests
time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
# DTensor tests
time python test/run_test.py --verbose -i distributed/_tensor/test_random_ops
time python test/run_test.py --verbose -i distributed/_tensor/test_dtensor_compile
# functional collective tests
time python test/run_test.py --verbose -i distributed/test_functional_api
# DeviceMesh test
time python test/run_test.py --verbose -i distributed/test_device_mesh
# DTensor tests
time python test/run_test.py --verbose -i distributed/tensor/test_random_ops
time python test/run_test.py --verbose -i distributed/tensor/test_dtensor_compile
# DTensor/TP tests
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_examples
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_random_state
# DeviceMesh test
time python test/run_test.py --verbose -i distributed/test_device_mesh
# FSDP2 tests
time python test/run_test.py --verbose -i distributed/_composable/fsdp/test_fully_shard_training -- -k test_2d_mlp_with_nd_mesh
# DTensor/TP tests
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_examples
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_random_state
# ND composability tests
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_2d_composability
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_pp_composability
# FSDP2 tests
time python test/run_test.py --verbose -i distributed/_composable/fsdp/test_fully_shard_training -- -k test_2d_mlp_with_nd_mesh
# ND composability tests
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_2d_composability
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_pp_composability
# Other tests
time python test/run_test.py --verbose -i test_cuda_primary_ctx
time python test/run_test.py --verbose -i test_optim -- -k test_forloop_goes_right_direction_multigpu
time python test/run_test.py --verbose -i test_optim -- -k test_mixed_device_dtype
time python test/run_test.py --verbose -i test_foreach -- -k test_tensors_grouping
fi
# Other tests
time python test/run_test.py --verbose -i test_cuda_primary_ctx
time python test/run_test.py --verbose -i test_optim -- -k test_forloop_goes_right_direction_multigpu
time python test/run_test.py --verbose -i test_optim -- -k test_mixed_device_dtype
time python test/run_test.py --verbose -i test_foreach -- -k test_tensors_grouping
assert_git_not_dirty

View File

@ -7,7 +7,7 @@ source "$pt_checkout/.ci/pytorch/common_utils.sh"
echo "python_doc_push_script.sh: Invoked with $*"
set -ex -o pipefail
set -ex
# for statements like ${1:-${DOCS_INSTALL_PATH:-docs/}}
# the order of operations goes:
@ -63,7 +63,7 @@ build_docs () {
echo "(tried to echo the WARNINGS above the ==== line)"
echo =========================
fi
set -ex -o pipefail
set -ex
return $code
}

View File

@ -1,436 +0,0 @@
#!/bin/bash
# shellcheck disable=SC2086,SC2048,SC2068,SC2145,SC2034,SC2207,SC2143
# TODO: Re-enable shellchecks above
set -eux -o pipefail
# Essentially runs pytorch/test/run_test.py, but keeps track of which tests to
# skip in a centralized place.
#
# TODO Except for a few tests, this entire file is a giant TODO. Why are these
# tests # failing?
# TODO deal with Windows
# This script expects to be in the pytorch root folder
if [[ ! -d 'test' || ! -f 'test/run_test.py' ]]; then
echo "run_tests.sh expects to be run from the Pytorch root directory " \
"but I'm actually in $(pwd)"
exit 2
fi
# Allow master skip of all tests
if [[ -n "${SKIP_ALL_TESTS:-}" ]]; then
exit 0
fi
# If given specific test params then just run those
if [[ -n "${RUN_TEST_PARAMS:-}" ]]; then
echo "$(date) :: Calling user-command $(pwd)/test/run_test.py ${RUN_TEST_PARAMS[@]}"
python test/run_test.py ${RUN_TEST_PARAMS[@]}
exit 0
fi
# Function to retry functions that sometimes timeout or have flaky failures
retry () {
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
}
# Parameters
##############################################################################
if [[ "$#" != 3 ]]; then
if [[ -z "${DESIRED_PYTHON:-}" || -z "${DESIRED_CUDA:-}" || -z "${PACKAGE_TYPE:-}" ]]; then
echo "USAGE: run_tests.sh PACKAGE_TYPE DESIRED_PYTHON DESIRED_CUDA"
echo "The env variable PACKAGE_TYPE must be set to 'manywheel' or 'libtorch'"
echo "The env variable DESIRED_PYTHON must be set like '2.7mu' or '3.6m' etc"
echo "The env variable DESIRED_CUDA must be set like 'cpu' or 'cu80' etc"
exit 1
fi
package_type="$PACKAGE_TYPE"
py_ver="$DESIRED_PYTHON"
cuda_ver="$DESIRED_CUDA"
else
package_type="$1"
py_ver="$2"
cuda_ver="$3"
fi
if [[ "$cuda_ver" == 'cpu-cxx11-abi' ]]; then
cuda_ver="cpu"
fi
# cu80, cu90, cu100, cpu
if [[ ${#cuda_ver} -eq 4 ]]; then
cuda_ver_majmin="${cuda_ver:2:1}.${cuda_ver:3:1}"
elif [[ ${#cuda_ver} -eq 5 ]]; then
cuda_ver_majmin="${cuda_ver:2:2}.${cuda_ver:4:1}"
fi
NUMPY_PACKAGE=""
if [[ ${py_ver} == "3.10" ]]; then
PROTOBUF_PACKAGE="protobuf>=3.17.2"
NUMPY_PACKAGE="numpy>=1.21.2"
else
PROTOBUF_PACKAGE="protobuf=3.14.0"
fi
# Environment initialization
if [[ "$(uname)" == Darwin ]]; then
# Install the testing dependencies
retry conda install -yq future hypothesis ${NUMPY_PACKAGE} ${PROTOBUF_PACKAGE} pytest setuptools six typing_extensions pyyaml
else
retry pip install -qr requirements.txt || true
retry pip install -q hypothesis protobuf pytest setuptools || true
numpy_ver=1.15
case "$(python --version 2>&1)" in
*2* | *3.5* | *3.6*)
numpy_ver=1.11
;;
esac
retry pip install -q "numpy==${numpy_ver}" || true
fi
echo "Testing with:"
pip freeze
conda list || true
##############################################################################
# Smoke tests
##############################################################################
# TODO use check_binary.sh, which requires making sure it runs on Windows
pushd /
echo "Smoke testing imports"
python -c 'import torch'
# Test that MKL is there
if [[ "$(uname)" == 'Darwin' && "$package_type" == *wheel ]]; then
echo 'Not checking for MKL on Darwin wheel packages'
else
echo "Checking that MKL is available"
python -c 'import torch; exit(0 if torch.backends.mkl.is_available() else 1)'
fi
if [[ "$OSTYPE" == "msys" ]]; then
GPUS=$(wmic path win32_VideoController get name)
if [[ ! "$GPUS" == *NVIDIA* ]]; then
echo "Skip CUDA tests for machines without a Nvidia GPU card"
exit 0
fi
fi
# Test that the version number is consistent during building and testing
if [[ "$PYTORCH_BUILD_NUMBER" -gt 1 ]]; then
expected_version="${PYTORCH_BUILD_VERSION}.post${PYTORCH_BUILD_NUMBER}"
else
expected_version="${PYTORCH_BUILD_VERSION}"
fi
echo "Checking that we are testing the package that is just built"
python -c "import torch; exit(0 if torch.__version__ == '$expected_version' else 1)"
# Test that CUDA builds are setup correctly
if [[ "$cuda_ver" != 'cpu' ]]; then
cuda_installed=1
nvidia-smi || cuda_installed=0
if [[ "$cuda_installed" == 0 ]]; then
echo "Skip CUDA tests for machines without a Nvidia GPU card"
else
# Test CUDA archs
echo "Checking that CUDA archs are setup correctly"
timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()'
# These have to run after CUDA is initialized
echo "Checking that magma is available"
python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)'
echo "Checking that CuDNN is available"
python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)'
fi
fi
# Check that OpenBlas is not linked to on MacOS
if [[ "$(uname)" == 'Darwin' ]]; then
echo "Checking the OpenBLAS is not linked to"
all_dylibs=($(find "$(python -c "import site; print(site.getsitepackages()[0])")"/torch -name '*.dylib'))
for dylib in "${all_dylibs[@]}"; do
if [[ -n "$(otool -L $dylib | grep -i openblas)" ]]; then
echo "Found openblas as a dependency of $dylib"
echo "Full dependencies is: $(otool -L $dylib)"
exit 1
fi
done
echo "Checking that OpenMP is available"
python -c "import torch; exit(0 if torch.backends.openmp.is_available() else 1)"
fi
popd
# TODO re-enable the other tests after the nightlies are moved to CI. This is
# because the binaries keep breaking, often from additional tests, that aren't
# real problems. Once these are on circleci and a smoke-binary-build is added
# to PRs then this should stop happening and these can be re-enabled.
echo "Not running unit tests. Hopefully these problems are caught by CI"
exit 0
##############################################################################
# Running unit tests (except not right now)
##############################################################################
echo "$(date) :: Starting tests for $package_type package for python$py_ver and $cuda_ver"
# We keep track of exact tests to skip, as otherwise we would be hardly running
# any tests. But b/c of issues working with pytest/normal-python-test/ and b/c
# of special snowflake tests in test/run_test.py we also take special care of
# those
tests_to_skip=()
#
# Entire file exclusions
##############################################################################
entire_file_exclusions=("-x")
# cpp_extensions doesn't work with pytest, so we exclude it from the pytest run
# here and then manually run it later. Note that this is only because this
# entire_fil_exclusions flag is only passed to the pytest run
entire_file_exclusions+=("cpp_extensions")
# TODO temporary line to fix next days nightlies, but should be removed when
# issue is fixed
entire_file_exclusions+=('type_info')
if [[ "$cuda_ver" == 'cpu' ]]; then
# test/test_cuda.py exits early if the installed torch is not built with
# CUDA, but the exit doesn't work when running with pytest, so pytest will
# still try to run all the CUDA tests and then fail
entire_file_exclusions+=("cuda")
entire_file_exclusions+=("nccl")
fi
if [[ "$(uname)" == 'Darwin' || "$OSTYPE" == "msys" ]]; then
# pytest on Mac doesn't like the exits in these files
entire_file_exclusions+=('c10d')
entire_file_exclusions+=('distributed')
# pytest doesn't mind the exit but fails the tests. On Mac we run this
# later without pytest
entire_file_exclusions+=('thd_distributed')
fi
#
# Universal flaky tests
##############################################################################
# RendezvousEnvTest sometimes hangs forever
# Otherwise it will fail on CUDA with
# Traceback (most recent call last):
# File "test_c10d.py", line 179, in test_common_errors
# next(gen)
# AssertionError: ValueError not raised
tests_to_skip+=('RendezvousEnvTest and test_common_errors')
# This hung forever once on conda_3.5_cu92
tests_to_skip+=('TestTorch and test_sum_dim')
# test_trace_warn isn't actually flaky, but it doesn't work with pytest so we
# just skip it
tests_to_skip+=('TestJit and test_trace_warn')
#
# Python specific flaky tests
##############################################################################
# test_dataloader.py:721: AssertionError
# looks like a timeout, but interestingly only appears on python 3
if [[ "$py_ver" == 3* ]]; then
tests_to_skip+=('TestDataLoader and test_proper_exit')
fi
#
# CUDA flaky tests, all package types
##############################################################################
if [[ "$cuda_ver" != 'cpu' ]]; then
#
# DistributedDataParallelTest
# All of these seem to fail
tests_to_skip+=('DistributedDataParallelTest')
#
# RendezvousEnvTest
# Traceback (most recent call last):
# File "test_c10d.py", line 201, in test_nominal
# store0, rank0, size0 = next(gen0)
# File "/opt/python/cp36-cp36m/lib/python3.6/site-packages/torch/distributed/rendezvous.py", line 131, in _env_rendezvous_handler
# store = TCPStore(master_addr, master_port, start_daemon)
# RuntimeError: Address already in use
tests_to_skip+=('RendezvousEnvTest and test_nominal')
#
# TestCppExtension
#
# Traceback (most recent call last):
# File "test_cpp_extensions.py", line 134, in test_jit_cudnn_extension
# with_cuda=True)
# File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 552, in load
# with_cuda)
# File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 729, in _jit_compile
# return _import_module_from_library(name, build_directory)
# File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 867, in _import_module_from_library
# return imp.load_module(module_name, file, path, description)
# File "/opt/python/cp35-cp35m/lib/python3.5/imp.py", line 243, in load_module
# return load_dynamic(name, filename, file)
# File "/opt/python/cp35-cp35m/lib/python3.5/imp.py", line 343, in load_dynamic
# return _load(spec)
# File "<frozen importlib._bootstrap>", line 693, in _load
# File "<frozen importlib._bootstrap>", line 666, in _load_unlocked
# File "<frozen importlib._bootstrap>", line 577, in module_from_spec
# File "<frozen importlib._bootstrap_external>", line 938, in create_module
# File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
# ImportError: libcudnn.so.7: cannot open shared object file: No such file or directory
tests_to_skip+=('TestCppExtension and test_jit_cudnn_extension')
#
# TestCuda
#
# 3.7_cu80
# RuntimeError: CUDA error: out of memory
tests_to_skip+=('TestCuda and test_arithmetic_large_tensor')
# 3.7_cu80
# RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch-nightly_1538097262541/work/aten/src/THC/THCTensorCopy.cu:205
tests_to_skip+=('TestCuda and test_autogpu')
#
# TestDistBackend
#
# Traceback (most recent call last):
# File "test_thd_distributed.py", line 1046, in wrapper
# self._join_and_reduce(fn)
# File "test_thd_distributed.py", line 1108, in _join_and_reduce
# self.assertEqual(p.exitcode, first_process.exitcode)
# File "/pytorch/test/common.py", line 399, in assertEqual
# super(TestCase, self).assertEqual(x, y, message)
# AssertionError: None != 77 :
tests_to_skip+=('TestDistBackend and test_all_gather_group')
tests_to_skip+=('TestDistBackend and test_all_reduce_group_max')
tests_to_skip+=('TestDistBackend and test_all_reduce_group_min')
tests_to_skip+=('TestDistBackend and test_all_reduce_group_sum')
tests_to_skip+=('TestDistBackend and test_all_reduce_group_product')
tests_to_skip+=('TestDistBackend and test_barrier_group')
tests_to_skip+=('TestDistBackend and test_broadcast_group')
# Traceback (most recent call last):
# File "test_thd_distributed.py", line 1046, in wrapper
# self._join_and_reduce(fn)
# File "test_thd_distributed.py", line 1108, in _join_and_reduce
# self.assertEqual(p.exitcode, first_process.exitcode)
# File "/pytorch/test/common.py", line 397, in assertEqual
# super(TestCase, self).assertLessEqual(abs(x - y), prec, message)
# AssertionError: 12 not less than or equal to 1e-05
tests_to_skip+=('TestDistBackend and test_barrier')
# Traceback (most recent call last):
# File "test_distributed.py", line 1267, in wrapper
# self._join_and_reduce(fn)
# File "test_distributed.py", line 1350, in _join_and_reduce
# self.assertEqual(p.exitcode, first_process.exitcode)
# File "/pytorch/test/common.py", line 399, in assertEqual
# super(TestCase, self).assertEqual(x, y, message)
# AssertionError: None != 1
tests_to_skip+=('TestDistBackend and test_broadcast')
# Memory leak very similar to all the conda ones below, but appears on manywheel
# 3.6m_cu80
# AssertionError: 1605632 not less than or equal to 1e-05 : __main__.TestEndToEndHybridFrontendModels.test_vae_cuda leaked 1605632 bytes CUDA memory on device 0
tests_to_skip+=('TestEndToEndHybridFrontendModels and test_vae_cuda')
# ________________________ TestNN.test_embedding_bag_cuda ________________________
#
# self = <test_nn.TestNN testMethod=test_embedding_bag_cuda>
# dtype = torch.float32
#
# @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
# @repeat_test_for_types(ALL_TENSORTYPES)
# @skipIfRocm
# def test_embedding_bag_cuda(self, dtype=torch.float):
# self._test_EmbeddingBag(True, 'sum', False, dtype)
# self._test_EmbeddingBag(True, 'mean', False, dtype)
# self._test_EmbeddingBag(True, 'max', False, dtype)
# if dtype != torch.half:
# # torch.cuda.sparse.HalfTensor is not enabled.
# self._test_EmbeddingBag(True, 'sum', True, dtype)
# > self._test_EmbeddingBag(True, 'mean', True, dtype)
#
# test_nn.py:2144:
# _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
# test_nn.py:2062: in _test_EmbeddingBag
# _test_vs_Embedding(N, D, B, L)
# test_nn.py:2059: in _test_vs_Embedding
# self.assertEqual(es_weight_grad, e.weight.grad, needed_prec)
# common.py:373: in assertEqual
# assertTensorsEqual(x, y)
# common.py:365: in assertTensorsEqual
# self.assertLessEqual(max_err, prec, message)
# E AssertionError: tensor(0.0000, device='cuda:0', dtype=torch.float32) not less than or equal to 2e-05 :
# 1 failed, 1202 passed, 19 skipped, 2 xfailed, 796 warnings in 1166.73 seconds =
# Traceback (most recent call last):
# File "test/run_test.py", line 391, in <module>
# main()
# File "test/run_test.py", line 383, in main
# raise RuntimeError(message)
tests_to_skip+=('TestNN and test_embedding_bag_cuda')
fi
##############################################################################
# MacOS specific flaky tests
##############################################################################
if [[ "$(uname)" == 'Darwin' ]]; then
# TestCppExtensions by default uses a temp folder in /tmp. This doesn't
# work for this Mac machine cause there is only one machine and /tmp is
# shared. (All the linux builds are on docker so have their own /tmp).
tests_to_skip+=('TestCppExtension')
fi
# Turn the set of tests to skip into an invocation that pytest understands
excluded_tests_logic=''
for exclusion in "${tests_to_skip[@]}"; do
if [[ -z "$excluded_tests_logic" ]]; then
# Only true for i==0
excluded_tests_logic="not ($exclusion)"
else
excluded_tests_logic="$excluded_tests_logic and not ($exclusion)"
fi
done
##############################################################################
# Run the tests
##############################################################################
echo
echo "$(date) :: Calling 'python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k '$excluded_tests_logic'"
python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k "'" "$excluded_tests_logic" "'"
echo
echo "$(date) :: Finished 'python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k '$excluded_tests_logic'"
# cpp_extensions don't work with pytest, so we run them without pytest here,
# except there's a failure on CUDA builds (documented above), and
# cpp_extensions doesn't work on a shared mac machine (also documented above)
if [[ "$cuda_ver" == 'cpu' && "$(uname)" != 'Darwin' ]]; then
echo
echo "$(date) :: Calling 'python test/run_test.py -v -i cpp_extensions'"
python test/run_test.py -v -i cpp_extensions
echo
echo "$(date) :: Finished 'python test/run_test.py -v -i cpp_extensions'"
fi
# thd_distributed can run on Mac but not in pytest
if [[ "$(uname)" == 'Darwin' ]]; then
echo
echo "$(date) :: Calling 'python test/run_test.py -v -i thd_distributed'"
python test/run_test.py -v -i thd_distributed
echo
echo "$(date) :: Finished 'python test/run_test.py -v -i thd_distributed'"
fi

View File

@ -1,130 +0,0 @@
#!/usr/bin/env python3
import concurrent.futures
import distutils.sysconfig
import functools
import itertools
import os
import re
from pathlib import Path
from typing import Any
# We also check that there are [not] cxx11 symbols in libtorch
#
# To check whether it is using cxx11 ABI, check non-existence of symbol:
PRE_CXX11_SYMBOLS = (
"std::basic_string<",
"std::list",
)
# To check whether it is using pre-cxx11 ABI, check non-existence of symbol:
CXX11_SYMBOLS = (
"std::__cxx11::basic_string",
"std::__cxx11::list",
)
# NOTE: Checking the above symbols in all namespaces doesn't work, because
# devtoolset7 always produces some cxx11 symbols even if we build with old ABI,
# and CuDNN always has pre-cxx11 symbols even if we build with new ABI using gcc 5.4.
# Instead, we *only* check the above symbols in the following namespaces:
LIBTORCH_NAMESPACE_LIST = (
"c10::",
"at::",
"caffe2::",
"torch::",
)
def _apply_libtorch_symbols(symbols):
return [
re.compile(f"{x}.*{y}")
for (x, y) in itertools.product(LIBTORCH_NAMESPACE_LIST, symbols)
]
LIBTORCH_CXX11_PATTERNS = _apply_libtorch_symbols(CXX11_SYMBOLS)
LIBTORCH_PRE_CXX11_PATTERNS = _apply_libtorch_symbols(PRE_CXX11_SYMBOLS)
@functools.lru_cache(100)
def get_symbols(lib: str) -> list[tuple[str, str, str]]:
from subprocess import check_output
lines = check_output(f'nm "{lib}"|c++filt', shell=True)
return [x.split(" ", 2) for x in lines.decode("latin1").split("\n")[:-1]]
def grep_symbols(lib: str, patterns: list[Any]) -> list[str]:
def _grep_symbols(
symbols: list[tuple[str, str, str]], patterns: list[Any]
) -> list[str]:
rc = []
for _s_addr, _s_type, s_name in symbols:
for pattern in patterns:
if pattern.match(s_name):
rc.append(s_name)
continue
return rc
all_symbols = get_symbols(lib)
num_workers = 32
chunk_size = (len(all_symbols) + num_workers - 1) // num_workers
def _get_symbols_chunk(i):
return all_symbols[i * chunk_size : (i + 1) * chunk_size]
with concurrent.futures.ThreadPoolExecutor(max_workers=32) as executor:
tasks = [
executor.submit(_grep_symbols, _get_symbols_chunk(i), patterns)
for i in range(num_workers)
]
return functools.reduce(list.__add__, (x.result() for x in tasks), [])
def check_lib_symbols_for_abi_correctness(lib: str, pre_cxx11_abi: bool = True) -> None:
print(f"lib: {lib}")
cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS)
pre_cxx11_symbols = grep_symbols(lib, LIBTORCH_PRE_CXX11_PATTERNS)
num_cxx11_symbols = len(cxx11_symbols)
num_pre_cxx11_symbols = len(pre_cxx11_symbols)
print(f"num_cxx11_symbols: {num_cxx11_symbols}")
print(f"num_pre_cxx11_symbols: {num_pre_cxx11_symbols}")
if pre_cxx11_abi:
if num_cxx11_symbols > 0:
raise RuntimeError(
f"Found cxx11 symbols, but there shouldn't be any, see: {cxx11_symbols[:100]}"
)
if num_pre_cxx11_symbols < 1000:
raise RuntimeError("Didn't find enough pre-cxx11 symbols.")
# Check for no recursive iterators, regression test for https://github.com/pytorch/pytorch/issues/133437
rec_iter_symbols = grep_symbols(
lib, [re.compile("std::filesystem::recursive_directory_iterator.*")]
)
if len(rec_iter_symbols) > 0:
raise RuntimeError(
f"recursive_directory_iterator in used pre-CXX11 binaries, see; {rec_iter_symbols}"
)
else:
if num_pre_cxx11_symbols > 0:
raise RuntimeError(
f"Found pre-cxx11 symbols, but there shouldn't be any, see: {pre_cxx11_symbols[:100]}"
)
if num_cxx11_symbols < 100:
raise RuntimeError("Didn't find enought cxx11 symbols")
def main() -> None:
if "install_root" in os.environ:
install_root = Path(os.getenv("install_root")) # noqa: SIM112
else:
if os.getenv("PACKAGE_TYPE") == "libtorch":
install_root = Path(os.getcwd())
else:
install_root = Path(distutils.sysconfig.get_python_lib()) / "torch"
libtorch_cpu_path = install_root / "lib" / "libtorch_cpu.so"
pre_cxx11_abi = "cxx11-abi" not in os.getenv("DESIRED_DEVTOOLSET", "")
check_lib_symbols_for_abi_correctness(libtorch_cpu_path, pre_cxx11_abi)
if __name__ == "__main__":
main()

View File

@ -1,205 +0,0 @@
import argparse
from torchvision import datasets, transforms
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__() # noqa: UP008
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)
def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output
def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print(
f"Train Epoch: {epoch} [{batch_idx * len(data)}/{len(train_loader.dataset)} ({100. * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}" # noqa: B950
)
if args.dry_run:
break
def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(
output, target, reduction="sum"
).item() # sum up batch loss
pred = output.argmax(
dim=1, keepdim=True
) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()
test_loss /= len(test_loader.dataset)
print(
f"\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({100. * correct / len(test_loader.dataset):.0f}%)\n" # noqa: B950
)
def timed(fn):
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
result = fn()
end.record()
torch.cuda.synchronize()
return result, start.elapsed_time(end) / 1000
def main():
# Training settings
parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
parser.add_argument(
"--batch-size",
type=int,
default=64,
metavar="N",
help="input batch size for training (default: 64)",
)
parser.add_argument(
"--test-batch-size",
type=int,
default=1000,
metavar="N",
help="input batch size for testing (default: 1000)",
)
parser.add_argument(
"--epochs",
type=int,
default=4,
metavar="N",
help="number of epochs to train (default: 14)",
)
parser.add_argument(
"--lr",
type=float,
default=1.0,
metavar="LR",
help="learning rate (default: 1.0)",
)
parser.add_argument(
"--gamma",
type=float,
default=0.7,
metavar="M",
help="Learning rate step gamma (default: 0.7)",
)
parser.add_argument(
"--no-cuda", action="store_true", default=False, help="disables CUDA training"
)
parser.add_argument(
"--no-mps",
action="store_true",
default=False,
help="disables macOS GPU training",
)
parser.add_argument(
"--dry-run",
action="store_true",
default=False,
help="quickly check a single pass",
)
parser.add_argument(
"--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
)
parser.add_argument(
"--log-interval",
type=int,
default=100,
metavar="N",
help="how many batches to wait before logging training status",
)
parser.add_argument(
"--save-model",
action="store_true",
default=False,
help="For Saving the current Model",
)
args = parser.parse_args()
use_cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()
torch.manual_seed(args.seed)
torch.backends.cuda.matmul.allow_tf32 = True
if use_cuda:
device = torch.device("cuda")
elif use_mps:
device = torch.device("mps")
else:
device = torch.device("cpu")
train_kwargs = {"batch_size": args.batch_size}
test_kwargs = {"batch_size": args.test_batch_size}
if use_cuda:
cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)
transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)
dataset1 = datasets.MNIST("../data", train=True, download=True, transform=transform)
dataset2 = datasets.MNIST("../data", train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
model = Net().to(device)
opt_model = torch.compile(model, mode="max-autotune")
optimizer = optim.Adadelta(opt_model.parameters(), lr=args.lr)
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
for epoch in range(1, args.epochs + 1):
print(
f"Training Time: {timed(lambda: train(args, opt_model, device, train_loader, optimizer, epoch))[1]}"
)
print(
f"Evaluation Time: {timed(lambda: test(opt_model, device, test_loader))[1]}"
)
scheduler.step()
if args.save_model:
torch.save(opt_model.state_dict(), "mnist_cnn.pt")
if __name__ == "__main__":
main()

View File

@ -1,422 +0,0 @@
import argparse
import importlib
import json
import os
import re
import subprocess
import sys
from pathlib import Path
from tempfile import NamedTemporaryFile
import torch
import torch._dynamo
import torch.nn as nn
import torch.nn.functional as F
if "MATRIX_GPU_ARCH_VERSION" in os.environ:
gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION")
else:
gpu_arch_ver = os.getenv("GPU_ARCH_VERSION") # Use fallback if available
gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE")
channel = os.getenv("MATRIX_CHANNEL")
package_type = os.getenv("MATRIX_PACKAGE_TYPE")
target_os = os.getenv("TARGET_OS", sys.platform)
BASE_DIR = Path(__file__).parent.parent.parent
is_cuda_system = gpu_arch_type == "cuda"
NIGHTLY_ALLOWED_DELTA = 3
MODULES = [
{
"name": "torchvision",
"repo": "https://github.com/pytorch/vision.git",
"smoke_test": "./vision/test/smoke_test.py",
"extension": "extension",
"repo_name": "vision",
},
{
"name": "torchaudio",
"repo": "https://github.com/pytorch/audio.git",
"smoke_test": "./audio/test/smoke_test/smoke_test.py --no-ffmpeg",
"extension": "_extension",
"repo_name": "audio",
},
]
class Net(nn.Module):
def __init__(self):
super().__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.fc1 = nn.Linear(9216, 1)
def forward(self, x):
x = self.conv1(x)
x = self.conv2(x)
x = F.max_pool2d(x, 2)
x = torch.flatten(x, 1)
output = self.fc1(x)
return output
def load_json_from_basedir(filename: str):
try:
with open(BASE_DIR / filename) as fptr:
return json.load(fptr)
except FileNotFoundError as exc:
raise ImportError(f"File {filename} not found error: {exc.strerror}") from exc
except json.JSONDecodeError as exc:
raise ImportError(f"Invalid JSON {filename}") from exc
def read_release_matrix():
return load_json_from_basedir("release_matrix.json")
def test_numpy():
import numpy as np
x = np.arange(5)
torch.tensor(x)
def check_version(package: str) -> None:
release_version = os.getenv("RELEASE_VERSION")
# if release_version is specified, use it to validate the packages
if release_version:
release_matrix = read_release_matrix()
stable_version = release_matrix["torch"]
else:
stable_version = os.getenv("MATRIX_STABLE_VERSION")
# only makes sense to check nightly package where dates are known
if channel == "nightly":
check_nightly_binaries_date(package)
elif stable_version is not None:
if not torch.__version__.startswith(stable_version):
raise RuntimeError(
f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}"
)
if release_version and package == "all":
for module in MODULES:
imported_module = importlib.import_module(module["name"])
module_version = imported_module.__version__
if not module_version.startswith(release_matrix[module["name"]]):
raise RuntimeError(
f"{module['name']} version mismatch, expected: \
{release_matrix[module['name']]} for channel {channel}. But its {module_version}"
)
else:
print(
f"{module['name']} version actual: {module_version} expected: \
{release_matrix[module['name']]} for channel {channel}."
)
else:
print(f"Skip version check for channel {channel} as stable version is None")
def check_nightly_binaries_date(package: str) -> None:
from datetime import datetime
format_dt = "%Y%m%d"
date_t_str = re.findall("dev\\d+", torch.__version__)
date_t_delta = datetime.now() - datetime.strptime(date_t_str[0][3:], format_dt)
if date_t_delta.days >= NIGHTLY_ALLOWED_DELTA:
raise RuntimeError(
f"the binaries are from {date_t_str} and are more than {NIGHTLY_ALLOWED_DELTA} days old!"
)
if package == "all":
for module in MODULES:
imported_module = importlib.import_module(module["name"])
module_version = imported_module.__version__
date_m_str = re.findall("dev\\d+", module_version)
date_m_delta = datetime.now() - datetime.strptime(
date_m_str[0][3:], format_dt
)
print(f"Nightly date check for {module['name']} version {module_version}")
if date_m_delta.days > NIGHTLY_ALLOWED_DELTA:
raise RuntimeError(
f"Expected {module['name']} to be less then {NIGHTLY_ALLOWED_DELTA} days. But its {date_m_delta}"
)
def test_cuda_runtime_errors_captured() -> None:
cuda_exception_missed = True
try:
print("Testing test_cuda_runtime_errors_captured")
torch._assert_async(torch.tensor(0, device="cuda"))
torch._assert_async(torch.tensor(0 + 0j, device="cuda"))
except RuntimeError as e:
if re.search("CUDA", f"{e}"):
print(f"Caught CUDA exception with success: {e}")
cuda_exception_missed = False
else:
raise e
if cuda_exception_missed:
raise RuntimeError("Expected CUDA RuntimeError but have not received!")
def test_cuda_gds_errors_captured() -> None:
major_version = int(torch.version.cuda.split(".")[0])
minor_version = int(torch.version.cuda.split(".")[1])
if major_version < 12 or (major_version == 12 and minor_version < 6):
print("CUDA version is not supported for GDS smoke test")
return
cuda_exception_missed = True
try:
print("Testing test_cuda_gds_errors_captured")
with NamedTemporaryFile() as f:
torch.cuda.gds.GdsFile(f.name, os.O_CREAT | os.O_RDWR)
except RuntimeError as e:
expected_error = "cuFileHandleRegister failed"
if re.search(expected_error, f"{e}"):
print(f"Caught CUDA exception with success: {e}")
cuda_exception_missed = False
else:
raise e
if cuda_exception_missed:
raise RuntimeError(
"Expected cuFileHandleRegister failed RuntimeError but have not received!"
)
def smoke_test_cuda(
package: str, runtime_error_check: str, torch_compile_check: str
) -> None:
if not torch.cuda.is_available() and is_cuda_system:
raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.")
if package == "all" and is_cuda_system:
for module in MODULES:
imported_module = importlib.import_module(module["name"])
# TBD for vision move extension module to private so it will
# be _extention.
version = "N/A"
if module["extension"] == "extension":
version = imported_module.extension._check_cuda_version()
else:
version = imported_module._extension._check_cuda_version()
print(f"{module['name']} CUDA: {version}")
# torch.compile is available on macos-arm64 and Linux for python 3.8-3.13
if (
torch_compile_check == "enabled"
and sys.version_info < (3, 14, 0)
and target_os in ["linux", "linux-aarch64", "macos-arm64", "darwin"]
):
smoke_test_compile("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
if torch.version.cuda != gpu_arch_ver:
raise RuntimeError(
f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}"
)
print(f"torch cuda: {torch.version.cuda}")
# todo add cudnn version validation
print(f"torch cudnn: {torch.backends.cudnn.version()}")
print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
torch.cuda.init()
print("CUDA initialized successfully")
print(f"Number of CUDA devices: {torch.cuda.device_count()}")
for i in range(torch.cuda.device_count()):
print(f"Device {i}: {torch.cuda.get_device_name(i)}")
# nccl is availbale only on Linux
if sys.platform in ["linux", "linux2"]:
print(f"torch nccl version: {torch.cuda.nccl.version()}")
if runtime_error_check == "enabled":
test_cuda_runtime_errors_captured()
def smoke_test_conv2d() -> None:
import torch.nn as nn
print("Testing smoke_test_conv2d")
# With square kernels and equal stride
m = nn.Conv2d(16, 33, 3, stride=2)
# non-square kernels and unequal stride and with padding
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
assert m is not None
# non-square kernels and unequal stride and with padding and dilation
basic_conv = nn.Conv2d(
16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1)
)
input = torch.randn(20, 16, 50, 100)
output = basic_conv(input)
if is_cuda_system:
print("Testing smoke_test_conv2d with cuda")
conv = nn.Conv2d(3, 3, 3).cuda()
x = torch.randn(1, 3, 24, 24, device="cuda")
with torch.cuda.amp.autocast():
out = conv(x)
assert out is not None
supported_dtypes = [torch.float16, torch.float32, torch.float64]
for dtype in supported_dtypes:
print(f"Testing smoke_test_conv2d with cuda for {dtype}")
conv = basic_conv.to(dtype).cuda()
input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
output = conv(input)
assert output is not None
def test_linalg(device="cpu") -> None:
print(f"Testing smoke_test_linalg on {device}")
A = torch.randn(5, 3, device=device)
U, S, Vh = torch.linalg.svd(A, full_matrices=False)
assert (
U.shape == A.shape
and S.shape == torch.Size([3])
and Vh.shape == torch.Size([3, 3])
)
torch.dist(A, U @ torch.diag(S) @ Vh)
U, S, Vh = torch.linalg.svd(A)
assert (
U.shape == torch.Size([5, 5])
and S.shape == torch.Size([3])
and Vh.shape == torch.Size([3, 3])
)
torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh)
A = torch.randn(7, 5, 3, device=device)
U, S, Vh = torch.linalg.svd(A, full_matrices=False)
torch.dist(A, U @ torch.diag_embed(S) @ Vh)
if device == "cuda":
supported_dtypes = [torch.float32, torch.float64]
for dtype in supported_dtypes:
print(f"Testing smoke_test_linalg with cuda for {dtype}")
A = torch.randn(20, 16, 50, 100, device=device, dtype=dtype)
torch.linalg.svd(A)
def smoke_test_compile(device: str = "cpu") -> None:
supported_dtypes = [torch.float16, torch.float32, torch.float64]
def foo(x: torch.Tensor) -> torch.Tensor:
return torch.sin(x) + torch.cos(x)
for dtype in supported_dtypes:
print(f"Testing smoke_test_compile for {device} and {dtype}")
x = torch.rand(3, 3, device=device).type(dtype)
x_eager = foo(x)
x_pt2 = torch.compile(foo)(x)
torch.testing.assert_close(x_eager, x_pt2)
# Check that SIMD were detected for the architecture
if device == "cpu":
from torch._inductor.codecache import pick_vec_isa
isa = pick_vec_isa()
if not isa:
raise RuntimeError("Can't detect vectorized ISA for CPU")
print(f"Picked CPU ISA {type(isa).__name__} bit width {isa.bit_width()}")
# Reset torch dynamo since we are changing mode
torch._dynamo.reset()
dtype = torch.float32
torch.set_float32_matmul_precision("high")
print(f"Testing smoke_test_compile with mode 'max-autotune' for {dtype}")
x = torch.rand(64, 1, 28, 28, device=device).type(torch.float32)
model = Net().to(device=device)
x_pt2 = torch.compile(model, mode="max-autotune")(x)
def smoke_test_modules():
cwd = os.getcwd()
for module in MODULES:
if module["repo"]:
if not os.path.exists(f"{cwd}/{module['repo_name']}"):
print(f"Path does not exist: {cwd}/{module['repo_name']}")
try:
subprocess.check_output(
f"git clone --depth 1 {module['repo']}",
stderr=subprocess.STDOUT,
shell=True,
)
except subprocess.CalledProcessError as exc:
raise RuntimeError(
f"Cloning {module['repo']} FAIL: {exc.returncode} Output: {exc.output}"
) from exc
try:
smoke_test_command = f"python3 {module['smoke_test']}"
if target_os == "windows":
smoke_test_command = f"python {module['smoke_test']}"
output = subprocess.check_output(
smoke_test_command,
stderr=subprocess.STDOUT,
shell=True,
universal_newlines=True,
)
except subprocess.CalledProcessError as exc:
raise RuntimeError(
f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}"
) from exc
else:
print(f"Output: \n{output}\n")
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument(
"--package",
help="Package to include in smoke testing",
type=str,
choices=["all", "torchonly"],
default="all",
)
parser.add_argument(
"--runtime-error-check",
help="No Runtime Error check",
type=str,
choices=["enabled", "disabled"],
default="enabled",
)
parser.add_argument(
"--torch-compile-check",
help="Check torch compile",
type=str,
choices=["enabled", "disabled"],
default="enabled",
)
return parser.parse_args()
def main() -> None:
options = parse_args()
print(f"torch: {torch.__version__}")
print(torch.__config__.parallel_info())
# All PyTorch binary builds should be built with OpenMP
if not torch.backends.openmp.is_available():
raise RuntimeError("PyTorch must be built with OpenMP support")
check_version(options.package)
smoke_test_conv2d()
test_linalg()
test_numpy()
if is_cuda_system:
test_linalg("cuda")
test_cuda_gds_errors_captured()
if options.package == "all":
smoke_test_modules()
smoke_test_cuda(
options.package, options.runtime_error_check, options.torch_compile_check
)
if __name__ == "__main__":
main()

View File

@ -4,7 +4,7 @@
# (This is set by default in the Docker images we build, so you don't
# need to set it yourself.
set -ex -o pipefail
set -ex
# Suppress ANSI color escape sequences
export TERM=vt100
@ -12,9 +12,9 @@ export TERM=vt100
# shellcheck source=./common.sh
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
# Do not change workspace permissions for ROCm and s390x CI jobs
# Do not change workspace permissions for ROCm CI jobs
# as it can leave workspace with bad permissions for cancelled jobs
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
cleanup_workspace() {
@ -46,9 +46,6 @@ BUILD_BIN_DIR="$BUILD_DIR"/bin
SHARD_NUMBER="${SHARD_NUMBER:=1}"
NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}"
# enable debug asserts in serialization
export TORCH_SERIALIZATION_DEBUG=1
export VALGRIND=ON
# export TORCH_INDUCTOR_INSTALL_GXX=ON
if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
@ -89,13 +86,6 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
export VALGRIND=OFF
fi
if [[ "$BUILD_ENVIRONMENT" == *s390x* ]]; then
# There are additional warnings on s390x, maybe due to newer gcc.
# Skip this check for now
export VALGRIND=OFF
fi
if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]] || [[ "${CONTINUE_THROUGH_ERROR}" == "1" ]]; then
# When rerunning disable tests, do not generate core dumps as it could consume
# the runner disk space when crashed tests are run multiple times. Running out
@ -139,7 +129,7 @@ if [[ "$TEST_CONFIG" == 'default' ]]; then
fi
if [[ "$TEST_CONFIG" == 'distributed' ]] && [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
export HIP_VISIBLE_DEVICES=0,1,2,3
export HIP_VISIBLE_DEVICES=0,1
fi
if [[ "$TEST_CONFIG" == 'slow' ]]; then
@ -163,8 +153,6 @@ elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
# setting PYTHON_TEST_EXTRA_OPTION
export PYTHON_TEST_EXTRA_OPTION="--xpu"
# Disable sccache for xpu test due to flaky issue https://github.com/pytorch/pytorch/issues/143585
sudo rm -rf /opt/cache
fi
if [[ "$TEST_CONFIG" == *crossref* ]]; then
@ -177,9 +165,6 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
# Print GPU info
rocminfo
rocminfo | grep -E 'Name:.*\sgfx|Marketing'
# for benchmarks/dynamo/check_accuracy.py, we need to put results in a rocm specific directory to avoid clashes with cuda
MAYBE_ROCM="rocm/"
fi
if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
@ -328,7 +313,6 @@ test_dynamo_wrapped_shard() {
--exclude-jit-executor \
--exclude-distributed-tests \
--exclude-torch-export-tests \
--exclude-aot-dispatch-tests \
--shard "$1" "$NUM_TEST_SHARDS" \
--verbose \
--upload-artifacts-while-running
@ -342,7 +326,7 @@ test_inductor_distributed() {
python test/run_test.py -i inductor/test_aot_inductor.py -k test_non_default_cuda_device --verbose
python test/run_test.py -i inductor/test_aot_inductor.py -k test_replicate_on_devices --verbose
python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
python test/run_test.py -i distributed/tensor/test_dtensor_compile.py --verbose
python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose
python test/run_test.py -i distributed/_composable/test_replicate_with_compiler.py --verbose
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
@ -395,32 +379,15 @@ test_inductor_aoti() {
CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
}
test_inductor_cpp_wrapper_shard() {
if [[ -z "$NUM_TEST_SHARDS" ]]; then
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
exit 1
fi
test_inductor_cpp_wrapper() {
export TORCHINDUCTOR_CPP_WRAPPER=1
TEST_REPORTS_DIR=$(pwd)/test/test-reports
mkdir -p "$TEST_REPORTS_DIR"
if [[ "$1" -eq "2" ]]; then
# For now, manually put the opinfo tests in shard 2, and all other tests in
# shard 1. Test specific things triggering past bugs, for now.
python test/run_test.py \
--include inductor/test_torchinductor_opinfo \
-k 'linalg or to_sparse' \
--verbose
exit
fi
# Run certain inductor unit tests with cpp wrapper. In the end state, we should be able to run all the inductor
# unit tests with cpp wrapper.
python test/run_test.py --include inductor/test_torchinductor.py --verbose
# Run certain inductor unit tests with cpp wrapper. In the end state, we
# should be able to run all the inductor unit tests with cpp_wrapper.
python test/run_test.py \
--include inductor/test_torchinductor inductor/test_max_autotune inductor/test_cpu_repro \
--verbose
python test/run_test.py --inductor --include test_torch -k 'take' --verbose
# Run inductor benchmark tests with cpp wrapper.
# Skip benchmark tests if it's in rerun-disabled-mode.
@ -433,7 +400,7 @@ test_inductor_cpp_wrapper_shard() {
--output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv"
python benchmarks/dynamo/check_accuracy.py \
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_timm_training.csv"
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv"
python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
--bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
@ -443,7 +410,7 @@ test_inductor_cpp_wrapper_shard() {
--bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
python benchmarks/dynamo/check_accuracy.py \
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_torchbench_inference.csv"
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
fi
}
@ -518,8 +485,6 @@ test_perf_for_dashboard() {
test_inductor_set_cpu_affinity
elif [[ "${TEST_CONFIG}" == *cuda_a10g* ]]; then
device=cuda_a10g
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
device=rocm
fi
for mode in "${modes[@]}"; do
@ -552,7 +517,7 @@ test_perf_for_dashboard() {
--dynamic-batch-only "$@" \
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_${device}_${target}.csv"
fi
if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]]; then
if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then
TORCHINDUCTOR_CPP_WRAPPER=1 $TASKSET python "benchmarks/dynamo/$suite.py" \
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
--output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_${device}_${target}.csv"
@ -636,16 +601,16 @@ test_single_dynamo_benchmark() {
TEST_CONFIG=${TEST_CONFIG//_avx512/}
fi
python "benchmarks/dynamo/$suite.py" \
--ci --accuracy --timing --explain --print-compilation-time \
--ci --accuracy --timing --explain \
"${DYNAMO_BENCHMARK_FLAGS[@]}" \
"$@" "${partition_flags[@]}" \
--output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
python benchmarks/dynamo/check_accuracy.py \
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}${TEST_CONFIG}_${name}.csv"
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
python benchmarks/dynamo/check_graph_breaks.py \
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}${TEST_CONFIG}_${name}.csv"
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
fi
}
@ -668,7 +633,7 @@ test_inductor_halide() {
}
test_inductor_triton_cpu() {
python test/run_test.py --include inductor/test_triton_cpu_backend.py inductor/test_torchinductor_strided_blocks.py --verbose
python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose
assert_git_not_dirty
}
@ -732,7 +697,7 @@ test_inductor_torchbench_smoketest_perf() {
--only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
python benchmarks/dynamo/check_accuracy.py \
--actual "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv" \
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_huggingface_training.csv"
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv"
done
}
@ -928,20 +893,10 @@ test_libtorch_api() {
else
# Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest"
# On s390x, pytorch is built without llvm.
# Even if it would be built with llvm, llvm currently doesn't support used features on s390x and
# test fails with errors like:
# JIT session error: Unsupported target machine architecture in ELF object pytorch-jitted-objectbuffer
# unknown file: Failure
# C++ exception with description "valOrErr INTERNAL ASSERT FAILED at "/var/lib/jenkins/workspace/torch/csrc/jit/tensorexpr/llvm_jit.h":34, please report a bug to PyTorch. Unexpected failure in LLVM JIT: Failed to materialize symbols: { (main, { func }) }
if [[ "${BUILD_ENVIRONMENT}" != *s390x* ]]; then
python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
fi
python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
fi
# quantization is not fully supported on s390x yet
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* && "${BUILD_ENVIRONMENT}" != *s390x* ]]; then
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* ]]; then
# NB: This test is not under TORCH_BIN_DIR but under BUILD_BIN_DIR
export CPP_TESTS_DIR="${BUILD_BIN_DIR}"
python test/run_test.py --cpp --verbose -i cpp/static_runtime_test
@ -1288,7 +1243,7 @@ EOF
}
test_bazel() {
set -e -o pipefail
set -e
# bazel test needs sccache setup.
# shellcheck source=./common-build.sh
@ -1439,7 +1394,7 @@ test_executorch() {
test_linux_aarch64() {
python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops \
test_foreach test_reductions test_unary_ufuncs \
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
# Dynamo tests
@ -1457,7 +1412,7 @@ test_linux_aarch64() {
inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \
inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \
inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes inductor/test_memory \
inductor/test_triton_cpu_backend inductor/test_triton_extension_backend inductor/test_mkldnn_pattern_matcher inductor/test_cpu_cpp_wrapper \
inductor/test_triton_cpu_backend inductor/test_triton_extension_backend \
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
}
@ -1466,9 +1421,9 @@ if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-baze
(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
fi
if [[ "${TEST_CONFIG}" == *numpy_2* ]]; then
# Install numpy-2.0.2 and compatible scipy & numba versions
python -mpip install --pre numpy==2.0.2 scipy==1.13.1 numba==0.60.0
python test/run_test.py --include dynamo/test_functions.py dynamo/test_unspec.py test_binary_ufuncs.py test_fake_tensor.py test_linalg.py test_numpy_interop.py test_tensor_creation_ops.py test_torch.py torch_np/test_basic.py
# Install numpy-2.0.2 and test inductor tracing
python -mpip install --pre numpy==2.0.2
python test/run_test.py --include dynamo/test_unspec.py
elif [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then
test_linux_aarch64
elif [[ "${TEST_CONFIG}" == *backward* ]]; then
@ -1542,7 +1497,7 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
install_torchaudio cuda
install_torchvision
checkout_install_torchbench hf_T5 llama moco
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
install_torchvision
test_inductor_shard "${SHARD_NUMBER}"

View File

@ -1,26 +0,0 @@
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
project(simple-torch-test)
find_package(Torch REQUIRED)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
add_executable(simple-torch-test simple-torch-test.cpp)
target_include_directories(simple-torch-test PRIVATE ${TORCH_INCLUDE_DIRS})
target_link_libraries(simple-torch-test "${TORCH_LIBRARIES}")
set_property(TARGET simple-torch-test PROPERTY CXX_STANDARD 17)
find_package(CUDAToolkit 11.8)
target_link_libraries(simple-torch-test CUDA::cudart CUDA::cufft CUDA::cusparse CUDA::cublas CUDA::cusolver)
find_library(CUDNN_LIBRARY NAMES cudnn)
target_link_libraries(simple-torch-test ${CUDNN_LIBRARY} )
if(MSVC)
file(GLOB TORCH_DLLS "$ENV{CUDA_PATH}/bin/cudnn64_8.dll" "$ENV{NVTOOLSEXT_PATH}/bin/x64/*.dll")
message("dlls to copy " ${TORCH_DLLS})
add_custom_command(TARGET simple-torch-test
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${TORCH_DLLS}
$<TARGET_FILE_DIR:simple-torch-test>)
endif(MSVC)

View File

@ -1,15 +0,0 @@
#include <torch/torch.h>
int main(int argc, const char* argv[]) {
std::cout << "Checking that CUDA archs are setup correctly" << std::endl;
TORCH_CHECK(torch::rand({ 3, 5 }, torch::Device(torch::kCUDA)).defined(), "CUDA archs are not setup correctly");
// These have to run after CUDA is initialized
std::cout << "Checking that magma is available" << std::endl;
TORCH_CHECK(torch::hasMAGMA(), "MAGMA is not available");
std::cout << "Checking that CuDNN is available" << std::endl;
TORCH_CHECK(torch::cuda::cudnn_is_available(), "CuDNN is not available");
return 0;
}

View File

@ -1,6 +0,0 @@
#include <torch/torch.h>
int main(int argc, const char* argv[]) {
TORCH_CHECK(torch::hasMKL(), "MKL is not available");
return 0;
}

View File

@ -1,7 +0,0 @@
#include <ATen/ATen.h>
#include <torch/torch.h>
int main(int argc, const char* argv[]) {
TORCH_CHECK(at::globalContext().isXNNPACKAvailable(), "XNNPACK is not available");
return 0;
}

View File

@ -1,38 +0,0 @@
r"""
It's used to check basic rnn features with cuda.
For example, it would throw exception if some components are missing
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class SimpleCNN(nn.Module):
def __init__(self):
super().__init__()
self.conv = nn.Conv2d(1, 1, 3)
self.pool = nn.MaxPool2d(2, 2)
def forward(self, inputs):
output = self.pool(F.relu(self.conv(inputs)))
output = output.view(1)
return output
# Mock one infer
device = torch.device("cuda:0")
net = SimpleCNN().to(device)
net_inputs = torch.rand((1, 1, 5, 5), device=device)
outputs = net(net_inputs)
print(outputs)
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.1)
# Mock one step training
label = torch.full((1,), 1.0, dtype=torch.float, device=device)
loss = criterion(outputs, label)
loss.backward()
optimizer.step()

View File

@ -1,41 +0,0 @@
r"""
It's used to check basic rnn features with cpu-only.
For example, it would throw exception if some components are missing
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
class SimpleCNN(nn.Module):
def __init__(self):
super().__init__()
self.conv = nn.Conv2d(1, 1, 3)
self.pool = nn.MaxPool2d(2, 2)
def forward(self, inputs):
output = self.pool(F.relu(self.conv(inputs)))
output = output.view(1)
return output
try:
# Mock one infer
net = SimpleCNN()
net_inputs = torch.rand((1, 1, 5, 5))
outputs = net(net_inputs)
print(outputs)
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.1)
# Mock one step training
label = torch.full((1,), 1.0, dtype=torch.float)
loss = criterion(outputs, label)
loss.backward()
optimizer.step()
except Exception as e:
print(f"An error occurred: {e}")

View File

@ -1,14 +0,0 @@
r"""
It's used to check basic rnn features with cuda.
For example, it would throw exception if missing some components are missing
"""
import torch
import torch.nn as nn
device = torch.device("cuda:0")
rnn = nn.RNN(10, 20, 2).to(device)
inputs = torch.randn(5, 3, 10).to(device)
h0 = torch.randn(2, 3, 20).to(device)
output, hn = rnn(inputs, h0)

View File

@ -1,13 +0,0 @@
r"""
It's used to check basic rnn features with cpu-only.
For example, it would throw exception if missing some components are missing
"""
import torch
import torch.nn as nn
rnn = nn.RNN(10, 20, 2)
inputs = torch.randn(5, 3, 10)
h0 = torch.randn(2, 3, 20)
output, hn = rnn(inputs, h0)

View File

@ -1,6 +0,0 @@
#include <torch/torch.h>
int main(int argc, const char* argv[]) {
TORCH_WARN("Simple test passed!");
return 0;
}

View File

@ -38,7 +38,7 @@ if [[ $PYLONG_API_CHECK == 0 ]]; then
echo "PyLong_AsUnsignedLong -> THPUtils_unpackUInt32 / THPUtils_unpackUInt64"
exit 1
fi
set -ex -o pipefail
set -ex
"$SCRIPT_HELPERS_DIR"/build_pytorch.bat

View File

@ -26,8 +26,7 @@ if not errorlevel 0 goto fail
if "%USE_XPU%"=="1" (
:: Install xpu support packages
set CUDA_VERSION=xpu
call %SCRIPT_HELPERS_DIR%\..\windows\internal\xpu_install.bat
call %INSTALLER_DIR%\install_xpu.bat
if errorlevel 1 exit /b 1
)

View File

@ -2,14 +2,6 @@
REM Description: Install Intel Support Packages on Windows
REM BKM reference: https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
if not "%CUDA_VERSION%" == "xpu" (
echo Skipping for non XPU builds
exit /b 0
)
set SRC_DIR=%NIGHTLIES_PYTORCH_ROOT%
if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build"
set XPU_INSTALL_MODE=%~1
if "%XPU_INSTALL_MODE%"=="" goto xpu_bundle_install_start
if "%XPU_INSTALL_MODE%"=="bundle" goto xpu_bundle_install_start
@ -120,14 +112,3 @@ if errorlevel 1 exit /b 1
del xpu_extra.exe
:xpu_install_end
if not "%XPU_ENABLE_KINETO%"=="1" goto install_end
:: Install Level Zero SDK
set XPU_EXTRA_LZ_URL=https://github.com/oneapi-src/level-zero/releases/download/v1.14.0/level-zero-sdk_1.14.0.zip
curl -k -L %XPU_EXTRA_LZ_URL% --output "%SRC_DIR%\temp_build\level_zero_sdk.zip"
echo "Installing level zero SDK..."
7z x "%SRC_DIR%\temp_build\level_zero_sdk.zip" -o"%SRC_DIR%\temp_build\level_zero"
set "INCLUDE=%SRC_DIR%\temp_build\level_zero\include;%INCLUDE%"
del "%SRC_DIR%\temp_build\level_zero_sdk.zip"
:install_end

View File

@ -1,5 +1,5 @@
#!/bin/bash
set -ex -o pipefail
set -ex
SCRIPT_PARENT_DIR=$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )
# shellcheck source=./common.sh
@ -18,9 +18,6 @@ export PYTORCH_FINAL_PACKAGE_DIR="${PYTORCH_FINAL_PACKAGE_DIR:-/c/w/build-result
PYTORCH_FINAL_PACKAGE_DIR_WIN=$(cygpath -w "${PYTORCH_FINAL_PACKAGE_DIR}")
export PYTORCH_FINAL_PACKAGE_DIR_WIN
# enable debug asserts in serialization
export TORCH_SERIALIZATION_DEBUG=1
mkdir -p "$TMP_DIR"/build/torch
export SCRIPT_HELPERS_DIR=$SCRIPT_PARENT_DIR/win-test-helpers
@ -38,13 +35,13 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
fi
# TODO: Move both of them to Windows AMI
python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==2.13.0 pytest-subtests==0.13.1
python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==2.13.0
# Install Z3 optional dependency for Windows builds.
python -m pip install z3-solver==4.12.2.0
# Install tlparse for test\dynamo\test_structured_trace.py UTs.
python -m pip install tlparse==0.3.30
python -m pip install tlparse==0.3.25
# Install parameterized
python -m pip install parameterized==0.8.1

View File

@ -1,31 +0,0 @@
@echo off
echo Dependency ARM Performance Libraries (APL) installation started.
:: Pre-check for downloads and dependencies folders
if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR%
if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
:: Set download URL for the ARM Performance Libraries (APL)
set DOWNLOAD_URL="https://developer.arm.com/-/cdn-downloads/permalink/Arm-Performance-Libraries/Version_24.10/arm-performance-libraries_24.10_Windows.msi"
set INSTALLER_FILE=%DOWNLOADS_DIR%\arm-performance-libraries.msi
:: Download installer
echo Downloading ARM Performance Libraries (APL)...
curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
:: Install ARM Performance Libraries (APL)
echo Installing ARM Performance Libraries (APL)...
msiexec /i "%INSTALLER_FILE%" /qn /norestart ACCEPT_EULA=1 INSTALLFOLDER="%DEPENDENCIES_DIR%"
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed to install ARM Performance Libraries (APL) components. (exitcode = %errorlevel%)"
exit /b 1
)
:: Add to environment
echo ARMPL_DIR=%DEPENDENCIES_DIR%\armpl_24.10\>> %GITHUB_ENV%
echo %DEPENDENCIES_DIR%\armpl_24.10\bin\>> %GITHUB_PATH%
echo Dependency ARM Performance Libraries (APL) installation finished.

View File

@ -1,49 +0,0 @@
@echo off
echo Dependency MSVC Build Tools with C++ with ARM64/ARM64EC components installation started.
:: Pre-check for downloads and dependencies folders
if not exist "%DOWNLOADS_DIR%" mkdir "%DOWNLOADS_DIR%"
if not exist "%DEPENDENCIES_DIR%" mkdir "%DEPENDENCIES_DIR%"
:: Set download URL for the Visual Studio Installer
set DOWNLOAD_URL=https://aka.ms/vs/17/release/vs_BuildTools.exe
set INSTALLER_FILE=%DOWNLOADS_DIR%\vs_BuildTools.exe
:: Download installer
echo Downloading Visual Studio Build Tools with C++ installer...
curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
:: Install the Visual Studio Build Tools with C++ components
echo Installing Visual Studio Build Tools with C++ components...
echo Installing MSVC %MSVC_VERSION%
if "%MSVC_VERSION%" == "latest" (
"%INSTALLER_FILE%" --norestart --nocache --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
--add Microsoft.VisualStudio.Component.VC.ASAN ^
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
--add Microsoft.VisualStudio.Component.VC.Tools.ARM64 ^
--add Microsoft.VisualStudio.Component.VC.Tools.x86.x64
) else if "%MSVC_VERSION%" == "14.40" (
"%INSTALLER_FILE%" --norestart --nocache --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
--add Microsoft.VisualStudio.Component.VC.ASAN ^
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
--add Microsoft.VisualStudio.Component.VC.14.40.17.10.ARM64 ^
--add Microsoft.VisualStudio.Component.VC.14.40.17.10.x86.x64
) else if "%MSVC_VERSION%" == "14.36" (
"%INSTALLER_FILE%" --norestart --nocache --quiet --wait --installPath "%DEPENDENCIES_DIR%\VSBuildTools" ^
--add Microsoft.VisualStudio.Component.Windows11SDK.22621 ^
--add Microsoft.VisualStudio.Component.VC.ASAN ^
--add Microsoft.VisualStudio.Component.VC.CMake.Project ^
--add Microsoft.VisualStudio.Component.VC.14.36.17.6.ARM64 ^
--add Microsoft.VisualStudio.Component.VC.14.36.17.6.x86.x64
)
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed to install Visual Studio Build Tools with C++ components. (exitcode = %errorlevel%)"
exit /b 1
)
echo Dependency Visual Studio Build Tools with C++ installation finished.

View File

@ -1,37 +0,0 @@
:: we need to install newer version of Git manually as "-submodules" function is not supported in the default version of runner.
@echo off
echo Dependency Git installation started.
:: Pre-check for downloads and dependencies folders
if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR%
if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
:: Set download URL for the Git
set DOWNLOAD_URL="https://github.com/git-for-windows/git/releases/download/v2.46.0.windows.1/Git-2.46.0-64-bit.exe"
set INSTALLER_FILE=%DOWNLOADS_DIR%\Git-2.46.0-64-bit.exe
:: Download installer
echo Downloading Git...
curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
:: Install Git
echo Installing Git...
"%INSTALLER_FILE%" /VERYSILENT /DIR="%DEPENDENCIES_DIR%\git"
dir %DEPENDENCIES_DIR%\git
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed to install Git. (exitcode = %errorlevel%)"
exit /b 1
)
:: Enable long paths
call "%DEPENDENCIES_DIR%\git\cmd\git.exe" config --system core.longpaths true
:: Add to PATH
echo %DEPENDENCIES_DIR%\git\cmd\;%DEPENDENCIES_DIR%\git\bin\>> %GITHUB_PATH%
echo Dependency Git installation finished.

View File

@ -1,33 +0,0 @@
@echo off
echo Dependency libuv installation started.
:: Pre-check for downloads and dependencies folders
if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR%
if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
:: activate visual studio
call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64
where cl.exe
cd %DEPENDENCIES_DIR%
git clone https://github.com/libuv/libuv.git -b v1.39.0
echo Configuring libuv...
mkdir libuv\build
cd libuv\build
cmake .. -DBUILD_TESTING=OFF
echo Building libuv...
cmake --build . --config Release
echo Installing libuv...
cmake --install . --prefix ../install
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed to install libuv. (exitcode = %errorlevel%)"
exit /b 1
)
echo Dependency libuv installation finished.

View File

@ -1,46 +0,0 @@
@echo off
echo Dependency OpenBLAS installation started.
:: Pre-check for downloads and dependencies folders
if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR%
if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
:: activate visual studio
call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64
where cl.exe
:: Clone OpenBLAS
cd %DEPENDENCIES_DIR%
git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.29
echo Configuring OpenBLAS...
mkdir OpenBLAS\build
cd OpenBLAS\build
cmake .. -G Ninja ^
-DBUILD_TESTING=0 ^
-DBUILD_BENCHMARKS=0 ^
-DC_LAPACK=1 ^
-DNOFORTRAN=1 ^
-DDYNAMIC_ARCH=0 ^
-DARCH=arm64 ^
-DBINARY=64 ^
-DTARGET=GENERIC ^
-DUSE_OPENMP=1 ^
-DCMAKE_SYSTEM_PROCESSOR=ARM64 ^
-DCMAKE_SYSTEM_NAME=Windows ^
-DCMAKE_BUILD_TYPE=Release
echo Building OpenBLAS...
cmake --build . --config Release
echo Installing OpenBLAS...
cmake --install . --prefix ../install
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed to install OpenBLAS. (exitcode = %errorlevel%)"
exit /b 1
)
echo Dependency OpenBLAS installation finished.

View File

@ -1,41 +0,0 @@
@echo off
echo Dependency Python installation started.
:: Pre-check for downloads and dependencies folders
if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR%
if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
if "%PYTHON_VERSION%"=="Python312" (
echo Python version is set to Python312
set DOWNLOAD_URL="https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe"
) else if "%PYTHON_VERSION%"=="Python311" (
echo Python version is set to Python311
set DOWNLOAD_URL="https://www.python.org/ftp/python/3.11.9/python-3.11.9-arm64.exe"
) else (
echo PYTHON_VERSION not defined, Python version is set to Python312
set DOWNLOAD_URL="https://www.python.org/ftp/python/3.12.7/python-3.12.7-arm64.exe"
)
set INSTALLER_FILE=%DOWNLOADS_DIR%\python-installer.exe
:: Download installer
echo Downloading Python...
curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
:: Install Python
echo Installing Python...
"%INSTALLER_FILE%" /quiet Include_debug=1 TargetDir="%DEPENDENCIES_DIR%\Python"
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed to install Python. (exitcode = %errorlevel%)"
exit /b 1
)
:: Add to PATH
echo %DEPENDENCIES_DIR%\Python\>> %GITHUB_PATH%
echo %DEPENDENCIES_DIR%\Python\scripts\>> %GITHUB_PATH%
echo %DEPENDENCIES_DIR%\Python\libs\>> %GITHUB_PATH%
echo Dependency Python installation finished.

View File

@ -1,33 +0,0 @@
@echo off
echo Dependency Rust installation started.
:: Pre-check for downloads and dependencies folders
if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR%
if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
set DOWNLOAD_URL="https://static.rust-lang.org/rustup/dist/x86_64-pc-windows-msvc/rustup-init.exe"
set INSTALLER_FILE=%DOWNLOADS_DIR%\rustup-init.exe
set RUSTUP_HOME=%DEPENDENCIES_DIR%\rust
set CARGO_HOME=%DEPENDENCIES_DIR%\cargo
:: Download installer
echo Downloading Rust...
curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
:: Install APL
echo Installing Rust...
"%INSTALLER_FILE%" -q -y --default-host aarch64-pc-windows-msvc --default-toolchain stable --profile default
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed to install Rust. (exitcode = %errorlevel%)"
exit /b 1
)
:: Add to PATH
echo %DEPENDENCIES_DIR%\cargo\bin\>> %GITHUB_PATH%
echo RUSTUP_HOME=%DEPENDENCIES_DIR%\rust>> %GITHUB_ENV%
echo CARGO_HOME=%DEPENDENCIES_DIR%\cargo>> %GITHUB_ENV%
echo Dependency Rust installation finished.

View File

@ -1,33 +0,0 @@
@echo off
echo Dependency sccache installation started.
:: Pre-check for downloads and dependencies folders
if not exist "%DOWNLOADS_DIR%" mkdir %DOWNLOADS_DIR%
if not exist "%DEPENDENCIES_DIR%" mkdir %DEPENDENCIES_DIR%
:: Set download URL for the sccache
set DOWNLOAD_URL="https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-pc-windows-msvc.zip"
set INSTALLER_FILE=%DOWNLOADS_DIR%\sccache.zip
:: Download installer
echo Downloading sccache.zip...
curl -L -o "%INSTALLER_FILE%" %DOWNLOAD_URL%
:: Install sccache
echo Extracting sccache.zip...
tar -xf "%INSTALLER_FILE%" -C %DEPENDENCIES_DIR%
cd %DEPENDENCIES_DIR%
ren sccache-v0.8.1-x86_64-pc-windows-msvc sccache
cd ..
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed to install sccache. (exitcode = %errorlevel%)"
exit /b 1
)
:: Add to PATH
echo %DEPENDENCIES_DIR%\sccache\>> %GITHUB_PATH%
echo Dependency sccache installation finished.

View File

@ -1,22 +0,0 @@
:: change to source directory
cd %PYTORCH_ROOT%
:: activate visual studio
call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64
where cl.exe
:: create virtual environment
python -m venv .venv
echo * > .venv\.gitignore
call .\.venv\Scripts\activate
where python
:: install dependencies
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install pytest numpy
:: find file name for pytorch wheel
for /f "delims=" %%f in ('dir /b "%PYTORCH_FINAL_PACKAGE_DIR%" ^| findstr "torch-"') do set "TORCH_WHEEL_FILENAME=%PYTORCH_FINAL_PACKAGE_DIR%\%%f"
pip install %TORCH_WHEEL_FILENAME%

View File

@ -1,101 +0,0 @@
@echo on
:: environment variables
set CMAKE_BUILD_TYPE=%BUILD_TYPE%
set CMAKE_C_COMPILER_LAUNCHER=sccache
set CMAKE_CXX_COMPILER_LAUNCHER=sccache
set libuv_ROOT=%DEPENDENCIES_DIR%\libuv\install
set MSSdk=1
if defined PYTORCH_BUILD_VERSION (
set PYTORCH_BUILD_VERSION=%PYTORCH_BUILD_VERSION%
set PYTORCH_BUILD_NUMBER=1
)
:: Set BLAS type
if %ENABLE_APL% == 1 (
set BLAS=APL
set USE_LAPACK=1
) else if %ENABLE_OPENBLAS% == 1 (
set BLAS=OpenBLAS
set OpenBLAS_HOME=%DEPENDENCIES_DIR%\OpenBLAS\install
)
:: activate visual studio
call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64
where cl.exe
:: change to source directory
cd %PYTORCH_ROOT%
:: copy libuv.dll
copy %libuv_ROOT%\lib\Release\uv.dll torch\lib\uv.dll
:: create virtual environment
python -m venv .venv
echo * > .venv\.gitignore
call .\.venv\Scripts\activate
where python
:: python install dependencies
python -m pip install --upgrade pip
pip install -r requirements.txt
:: DISTUTILS_USE_SDK should be set after psutil dependency
set DISTUTILS_USE_SDK=1
:: start sccache server and reset sccache stats
sccache --start-server
sccache --zero-stats
sccache --show-stats
:: Prepare the environment
mkdir libtorch
mkdir libtorch\bin
mkdir libtorch\cmake
mkdir libtorch\include
mkdir libtorch\lib
mkdir libtorch\share
mkdir libtorch\test
:: Call LibTorch build script
python ./tools/build_libtorch.py
:: Check if there is an error
IF ERRORLEVEL 1 exit /b 1
IF NOT ERRORLEVEL 0 exit /b 1
:: Move the files to the correct location
move /Y torch\bin\*.* libtorch\bin\
move /Y torch\cmake\*.* libtorch\cmake\
robocopy /move /e torch\include\ libtorch\include\
move /Y torch\lib\*.* libtorch\lib\
robocopy /move /e torch\share\ libtorch\share\
move /Y torch\test\*.* libtorch\test\
move /Y libtorch\bin\*.dll libtorch\lib\
:: Set version
echo %PYTORCH_BUILD_VERSION% > libtorch\build-version
git rev-parse HEAD > libtorch\build-hash
:: Set LIBTORCH_PREFIX
IF "%DEBUG%" == "" (
set LIBTORCH_PREFIX=libtorch-win-arm64-shared-with-deps
) ELSE (
set LIBTORCH_PREFIX=libtorch-win-arm64-shared-with-deps-debug
)
:: Create output
C:\Windows\System32\tar.exe -cvaf %LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip -C libtorch *
:: Copy output to target directory
if not exist ..\output mkdir ..\output
copy /Y "%LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip" "%PYTORCH_FINAL_PACKAGE_DIR%\"
copy /Y "%LIBTORCH_PREFIX%-%PYTORCH_BUILD_VERSION%.zip" "%PYTORCH_FINAL_PACKAGE_DIR%\%LIBTORCH_PREFIX%-latest.zip"
:: Cleanup raw data to save space
rmdir /s /q libtorch
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed on build_libtorch. (exitcode = %errorlevel%)"
exit /b 1
)

View File

@ -1,60 +0,0 @@
@echo on
:: environment variables
set CMAKE_BUILD_TYPE=%BUILD_TYPE%
set CMAKE_C_COMPILER_LAUNCHER=sccache
set CMAKE_CXX_COMPILER_LAUNCHER=sccache
set libuv_ROOT=%DEPENDENCIES_DIR%\libuv\install
set MSSdk=1
if defined PYTORCH_BUILD_VERSION (
set PYTORCH_BUILD_VERSION=%PYTORCH_BUILD_VERSION%
set PYTORCH_BUILD_NUMBER=1
)
:: Set BLAS type
if %ENABLE_APL% == 1 (
set BLAS=APL
set USE_LAPACK=1
) else if %ENABLE_OPENBLAS% == 1 (
set BLAS=OpenBLAS
set OpenBLAS_HOME=%DEPENDENCIES_DIR%\OpenBLAS\install
)
:: activate visual studio
call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64
where cl.exe
:: change to source directory
cd %PYTORCH_ROOT%
:: copy libuv.dll
copy %libuv_ROOT%\lib\Release\uv.dll torch\lib\uv.dll
:: create virtual environment
python -m venv .venv
echo * > .venv\.gitignore
call .\.venv\Scripts\activate
where python
:: python install dependencies
python -m pip install --upgrade pip
pip install -r requirements.txt
:: DISTUTILS_USE_SDK should be set after psutil dependency
set DISTUTILS_USE_SDK=1
:: start sccache server and reset sccache stats
sccache --start-server
sccache --zero-stats
sccache --show-stats
:: Call PyTorch build script
python setup.py bdist_wheel -d "%PYTORCH_FINAL_PACKAGE_DIR%"
:: show sccache stats
sccache --show-stats
:: Check if installation was successful
if %errorlevel% neq 0 (
echo "Failed on build_pytorch. (exitcode = %errorlevel%)"
exit /b 1
)

View File

@ -1,65 +0,0 @@
@echo off
setlocal
set "ORIG_PATH=%PATH%"
if "%PACKAGE_TYPE%" == "wheel" goto wheel
if "%PACKAGE_TYPE%" == "libtorch" goto libtorch
echo "unknown package type"
exit /b 1
:wheel
echo "install wheel package"
echo Running pip install...
pip install -q --pre numpy protobuf
echo Error level after pip install: %ERRORLEVEL%
if errorlevel 1 exit /b 1
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *.whl') do pip install "%%i"
if errorlevel 1 exit /b 1
goto smoke_test
:smoke_test
python -c "import torch"
if ERRORLEVEL 1 exit /b 1
echo Running python rnn_smoke.py...
python %PYTORCH_ROOT%\.ci\pytorch\test_example_code\rnn_smoke_win_arm64.py
if errorlevel 1 exit /b 1
echo Checking that basic CNN works...
python %PYTORCH_ROOT%\.ci\pytorch\test_example_code\cnn_smoke_win_arm64.py
if errorlevel 1 exit /b 1
goto end
:libtorch
echo "install and test libtorch"
for /F "delims=" %%i in ('where /R "%PYTORCH_FINAL_PACKAGE_DIR:/=\%" *-latest.zip') do tar -xf "%%i" -C tmp
if ERRORLEVEL 1 exit /b 1
pushd tmp\libtorch
set VC_VERSION_LOWER=14
set VC_VERSION_UPPER=36
call "%DEPENDENCIES_DIR%\VSBuildTools\VC\Auxiliary\Build\vcvarsall.bat" arm64
set install_root=%CD%
set INCLUDE=%INCLUDE%;%install_root%\include;%install_root%\include\torch\csrc\api\include
set LIB=%LIB%;%install_root%\lib
set PATH=%PATH%;%install_root%\lib
cl %PYTORCH_ROOT%\.ci\pytorch\test_example_code\simple-torch-test.cpp c10.lib torch_cpu.lib /EHsc /std:c++17
if ERRORLEVEL 1 exit /b 1
.\simple-torch-test.exe
if ERRORLEVEL 1 exit /b 1
:end
set "PATH=%ORIG_PATH%"
popd

View File

@ -1,133 +0,0 @@
@echo off
:: This script parses args, installs required libraries (miniconda, MKL,
:: Magma), and then delegates to cpu.bat, cuda80.bat, etc.
if not "%CUDA_VERSION%" == "" if not "%PYTORCH_BUILD_VERSION%" == "" if not "%PYTORCH_BUILD_NUMBER%" == "" goto env_end
if "%~1"=="" goto arg_error
if "%~2"=="" goto arg_error
if "%~3"=="" goto arg_error
if not "%~4"=="" goto arg_error
goto arg_end
:arg_error
echo Illegal number of parameters. Pass cuda version, pytorch version, build number
echo CUDA version should be Mm with no dot, e.g. '80'
echo DESIRED_PYTHON should be M.m, e.g. '2.7'
exit /b 1
:arg_end
set CUDA_VERSION=%~1
set PYTORCH_BUILD_VERSION=%~2
set PYTORCH_BUILD_NUMBER=%~3
:env_end
set CUDA_PREFIX=cuda%CUDA_VERSION%
if "%CUDA_VERSION%" == "cpu" set CUDA_PREFIX=cpu
if "%CUDA_VERSION%" == "xpu" set CUDA_PREFIX=xpu
if "%DESIRED_PYTHON%" == "" set DESIRED_PYTHON=3.5;3.6;3.7
set DESIRED_PYTHON_PREFIX=%DESIRED_PYTHON:.=%
set DESIRED_PYTHON_PREFIX=py%DESIRED_PYTHON_PREFIX:;=;py%
set SRC_DIR=%~dp0
pushd %SRC_DIR%
:: Install Miniconda3
set "CONDA_HOME=%CD%\conda"
set "tmp_conda=%CONDA_HOME%"
set "miniconda_exe=%CD%\miniconda.exe"
rmdir /s /q conda
del miniconda.exe
curl --retry 3 -k https://repo.anaconda.com/miniconda/Miniconda3-py311_23.9.0-0-Windows-x86_64.exe -o "%miniconda_exe%"
start /wait "" "%miniconda_exe%" /S /InstallationType=JustMe /RegisterPython=0 /AddToPath=0 /D=%tmp_conda%
if ERRORLEVEL 1 exit /b 1
set "ORIG_PATH=%PATH%"
set "PATH=%CONDA_HOME%;%CONDA_HOME%\scripts;%CONDA_HOME%\Library\bin;%PATH%"
:: create a new conda environment and install packages
:try
SET /A tries=3
:loop
IF %tries% LEQ 0 GOTO :exception
call condaenv.bat
IF %ERRORLEVEL% EQU 0 GOTO :done
SET /A "tries=%tries%-1"
:exception
echo "Failed to create conda env"
exit /B 1
:done
:: Download MAGMA Files on CUDA builds
set MAGMA_VERSION=2.5.4
if "%DEBUG%" == "1" (
set BUILD_TYPE=debug
) else (
set BUILD_TYPE=release
)
if not "%CUDA_VERSION%" == "cpu" if not "%CUDA_VERSION%" == "xpu" (
rmdir /s /q magma_%CUDA_PREFIX%_%BUILD_TYPE%
del magma_%CUDA_PREFIX%_%BUILD_TYPE%.7z
curl -k https://s3.amazonaws.com/ossci-windows/magma_%MAGMA_VERSION%_%CUDA_PREFIX%_%BUILD_TYPE%.7z -o magma_%CUDA_PREFIX%_%BUILD_TYPE%.7z
7z x -aoa magma_%CUDA_PREFIX%_%BUILD_TYPE%.7z -omagma_%CUDA_PREFIX%_%BUILD_TYPE%
)
:: Install sccache
if "%USE_SCCACHE%" == "1" (
mkdir %CD%\tmp_bin
curl -k https://s3.amazonaws.com/ossci-windows/sccache.exe --output %CD%\tmp_bin\sccache.exe
curl -k https://s3.amazonaws.com/ossci-windows/sccache-cl.exe --output %CD%\tmp_bin\sccache-cl.exe
if not "%CUDA_VERSION%" == "" (
set ADDITIONAL_PATH=%CD%\tmp_bin
set SCCACHE_IDLE_TIMEOUT=1500
:: randomtemp is used to resolve the intermittent build error related to CUDA.
:: code: https://github.com/peterjc123/randomtemp-rust
:: issue: https://github.com/pytorch/pytorch/issues/25393
::
:: CMake requires a single command as CUDA_NVCC_EXECUTABLE, so we push the wrappers
:: randomtemp.exe and sccache.exe into a batch file which CMake invokes.
curl -kL https://github.com/peterjc123/randomtemp-rust/releases/download/v0.4/randomtemp.exe --output %SRC_DIR%\tmp_bin\randomtemp.exe
echo @"%SRC_DIR%\tmp_bin\randomtemp.exe" "%SRC_DIR%\tmp_bin\sccache.exe" "%CUDA_PATH%\bin\nvcc.exe" %%* > "%SRC_DIR%/tmp_bin/nvcc.bat"
cat %SRC_DIR%/tmp_bin/nvcc.bat
set CUDA_NVCC_EXECUTABLE=%SRC_DIR%/tmp_bin/nvcc.bat
:: CMake doesn't accept back-slashes in the path
for /F "usebackq delims=" %%n in (`cygpath -m "%CUDA_PATH%\bin\nvcc.exe"`) do set CMAKE_CUDA_COMPILER=%%n
set CMAKE_CUDA_COMPILER_LAUNCHER=%SRC_DIR%\tmp_bin\randomtemp.exe;%SRC_DIR%\tmp_bin\sccache.exe
)
)
set PYTORCH_BINARY_BUILD=1
set TH_BINARY_BUILD=1
set INSTALL_TEST=0
for %%v in (%DESIRED_PYTHON_PREFIX%) do (
:: Activate Python Environment
set PYTHON_PREFIX=%%v
set "CONDA_LIB_PATH=%CONDA_HOME%\envs\%%v\Library\bin"
if not "%ADDITIONAL_PATH%" == "" (
set "PATH=%ADDITIONAL_PATH%;%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%"
) else (
set "PATH=%CONDA_HOME%\envs\%%v;%CONDA_HOME%\envs\%%v\scripts;%CONDA_HOME%\envs\%%v\Library\bin;%ORIG_PATH%"
)
pip install ninja
@setlocal
:: Set Flags
if not "%CUDA_VERSION%"=="cpu" if not "%CUDA_VERSION%" == "xpu" (
set MAGMA_HOME=%cd%\magma_%CUDA_PREFIX%_%BUILD_TYPE%
)
echo "Calling arch build script"
call %CUDA_PREFIX%.bat
if ERRORLEVEL 1 exit /b 1
@endlocal
)
set "PATH=%ORIG_PATH%"
popd
if ERRORLEVEL 1 exit /b 1

View File

@ -1,27 +0,0 @@
IF "%DESIRED_PYTHON%"=="" (
echo DESIRED_PYTHON is NOT defined.
exit /b 1
)
:: Create a new conda environment
setlocal EnableDelayedExpansion
FOR %%v IN (%DESIRED_PYTHON%) DO (
set PYTHON_VERSION_STR=%%v
set PYTHON_VERSION_STR=!PYTHON_VERSION_STR:.=!
conda remove -n py!PYTHON_VERSION_STR! --all -y || rmdir %CONDA_HOME%\envs\py!PYTHON_VERSION_STR! /s
if "%%v" == "3.9" call conda create -n py!PYTHON_VERSION_STR! -y numpy=2.0.1 boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v
if "%%v" == "3.10" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.0.1 boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v
if "%%v" == "3.11" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.0.1 boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v
if "%%v" == "3.12" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.0.1 boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v
if "%%v" == "3.13" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.1.2 boto3 cmake ninja typing_extensions setuptools=72.1.0 python=%%v
if "%%v" == "3.13t" call conda create -n py!PYTHON_VERSION_STR! -y -c=conda-forge numpy=2.1.2 boto3 cmake ninja typing_extensions setuptools=72.1.0 python-freethreading python=3.13
call conda run -n py!PYTHON_VERSION_STR! pip install pyyaml
call conda run -n py!PYTHON_VERSION_STR! pip install mkl-include
call conda run -n py!PYTHON_VERSION_STR! pip install mkl-static
)
endlocal
:: Install libuv
conda install -y -q -c conda-forge libuv=1.39
set libuv_ROOT=%CONDA_HOME%\Library
echo libuv_ROOT=%libuv_ROOT%

View File

@ -1,30 +0,0 @@
@echo off
set MODULE_NAME=pytorch
IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
call internal\clone.bat
cd %~dp0
) ELSE (
call internal\clean.bat
)
IF ERRORLEVEL 1 goto :eof
call internal\check_deps.bat
IF ERRORLEVEL 1 goto :eof
REM Check for optional components
echo Disabling CUDA
set USE_CUDA=0
call internal\check_opts.bat
IF ERRORLEVEL 1 goto :eof
if exist "%NIGHTLIES_PYTORCH_ROOT%" cd %NIGHTLIES_PYTORCH_ROOT%\..
call %~dp0\internal\copy_cpu.bat
IF ERRORLEVEL 1 goto :eof
call %~dp0\internal\setup.bat
IF ERRORLEVEL 1 goto :eof

View File

@ -1,59 +0,0 @@
@echo off
set MODULE_NAME=pytorch
IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
call internal\clone.bat
cd %~dp0
) ELSE (
call internal\clean.bat
)
IF ERRORLEVEL 1 goto :eof
call internal\check_deps.bat
IF ERRORLEVEL 1 goto :eof
REM Check for optional components
set USE_CUDA=
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
IF "%NVTOOLSEXT_PATH%"=="" (
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
) ELSE (
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
exit /b 1
)
)
IF "%CUDA_PATH_V118%"=="" (
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin\nvcc.exe" (
set "CUDA_PATH_V118=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8"
) ELSE (
echo CUDA 11.8 not found, failing
exit /b 1
)
)
IF "%BUILD_VISION%" == "" (
set TORCH_CUDA_ARCH_LIST=3.7+PTX;5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
) ELSE (
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_35,code=sm_35 -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90
)
set "CUDA_PATH=%CUDA_PATH_V118%"
set "PATH=%CUDA_PATH_V118%\bin;%PATH%"
:optcheck
call internal\check_opts.bat
IF ERRORLEVEL 1 goto :eof
if exist "%NIGHTLIES_PYTORCH_ROOT%" cd %NIGHTLIES_PYTORCH_ROOT%\..
call %~dp0\internal\copy.bat
IF ERRORLEVEL 1 goto :eof
call %~dp0\internal\setup.bat
IF ERRORLEVEL 1 goto :eof

View File

@ -1,59 +0,0 @@
@echo off
set MODULE_NAME=pytorch
IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
call internal\clone.bat
cd %~dp0
) ELSE (
call internal\clean.bat
)
IF ERRORLEVEL 1 goto :eof
call internal\check_deps.bat
IF ERRORLEVEL 1 goto :eof
REM Check for optional components
set USE_CUDA=
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
IF "%NVTOOLSEXT_PATH%"=="" (
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
) ELSE (
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
exit /b 1
)
)
IF "%CUDA_PATH_V124%"=="" (
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin\nvcc.exe" (
set "CUDA_PATH_V124=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
) ELSE (
echo CUDA 12.4 not found, failing
exit /b 1
)
)
IF "%BUILD_VISION%" == "" (
set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
) ELSE (
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90
)
set "CUDA_PATH=%CUDA_PATH_V124%"
set "PATH=%CUDA_PATH_V124%\bin;%PATH%"
:optcheck
call internal\check_opts.bat
IF ERRORLEVEL 1 goto :eof
if exist "%NIGHTLIES_PYTORCH_ROOT%" cd %NIGHTLIES_PYTORCH_ROOT%\..
call %~dp0\internal\copy.bat
IF ERRORLEVEL 1 goto :eof
call %~dp0\internal\setup.bat
IF ERRORLEVEL 1 goto :eof

View File

@ -1,59 +0,0 @@
@echo off
set MODULE_NAME=pytorch
IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
call internal\clone.bat
cd %~dp0
) ELSE (
call internal\clean.bat
)
IF ERRORLEVEL 1 goto :eof
call internal\check_deps.bat
IF ERRORLEVEL 1 goto :eof
REM Check for optional components
set USE_CUDA=
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
IF "%NVTOOLSEXT_PATH%"=="" (
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
) ELSE (
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
exit /b 1
)
)
IF "%CUDA_PATH_V126%"=="" (
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6\bin\nvcc.exe" (
set "CUDA_PATH_V126=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6"
) ELSE (
echo CUDA 12.6 not found, failing
exit /b 1
)
)
IF "%BUILD_VISION%" == "" (
set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
) ELSE (
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90
)
set "CUDA_PATH=%CUDA_PATH_V126%"
set "PATH=%CUDA_PATH_V126%\bin;%PATH%"
:optcheck
call internal\check_opts.bat
IF ERRORLEVEL 1 goto :eof
if exist "%NIGHTLIES_PYTORCH_ROOT%" cd %NIGHTLIES_PYTORCH_ROOT%\..
call %~dp0\internal\copy.bat
IF ERRORLEVEL 1 goto :eof
call %~dp0\internal\setup.bat
IF ERRORLEVEL 1 goto :eof

View File

@ -1,59 +0,0 @@
@echo off
set MODULE_NAME=pytorch
IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
call internal\clone.bat
cd %~dp0
) ELSE (
call internal\clean.bat
)
IF ERRORLEVEL 1 goto :eof
call internal\check_deps.bat
IF ERRORLEVEL 1 goto :eof
REM Check for optional components
set USE_CUDA=
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
IF "%NVTOOLSEXT_PATH%"=="" (
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
) ELSE (
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
exit /b 1
)
)
IF "%CUDA_PATH_V128%"=="" (
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8\bin\nvcc.exe" (
set "CUDA_PATH_V128=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8"
) ELSE (
echo CUDA 12.8 not found, failing
exit /b 1
)
)
IF "%BUILD_VISION%" == "" (
set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0;10.0;12.0
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
) ELSE (
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
)
set "CUDA_PATH=%CUDA_PATH_V128%"
set "PATH=%CUDA_PATH_V128%\bin;%PATH%"
:optcheck
call internal\check_opts.bat
IF ERRORLEVEL 1 goto :eof
if exist "%NIGHTLIES_PYTORCH_ROOT%" cd %NIGHTLIES_PYTORCH_ROOT%\..
call %~dp0\internal\copy.bat
IF ERRORLEVEL 1 goto :eof
call %~dp0\internal\setup.bat
IF ERRORLEVEL 1 goto :eof

View File

@ -1,9 +0,0 @@
@echo off
curl -k https://www.7-zip.org/a/7z1805-x64.exe -O
if errorlevel 1 exit /b 1
start /wait 7z1805-x64.exe /S
if errorlevel 1 exit /b 1
set "PATH=%ProgramFiles%\7-Zip;%PATH%"

View File

@ -1,11 +0,0 @@
call windows/internal/vc_install_helper.bat
if errorlevel 1 exit /b 1
call windows/internal/cuda_install.bat
if errorlevel 1 exit /b 1
call windows/internal/xpu_install.bat
if errorlevel 1 exit /b 1
call windows/build_pytorch.bat %CUDA_VERSION% %PYTORCH_BUILD_VERSION% %PYTORCH_BUILD_NUMBER%
if errorlevel 1 exit /b 1

View File

@ -1,80 +0,0 @@
@echo off
REM Check for necessary components
echo "Checking dependencies"
IF NOT "%PROCESSOR_ARCHITECTURE%"=="AMD64" (
echo You should use 64 bits Windows to build and run PyTorch
exit /b 1
)
echo "Cmake check"
IF "%BUILD_VISION%" == "" (
where /q cmake.exe
IF ERRORLEVEL 1 (
echo CMake is required to compile PyTorch on Windows
exit /b 1
)
)
if not exist "%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" (
echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch on Windows
exit /b 1
)
set VC_VERSION_LOWER=17
set VC_VERSION_UPPER=18
if "%VC_YEAR%" == "2019" (
set VC_VERSION_LOWER=16
set VC_VERSION_UPPER=17
)
if NOT "%VS15INSTALLDIR%" == "" if exist "%VS15INSTALLDIR%\VC\Auxiliary\Build\vcvarsall.bat" (
set "VS15VCVARSALL=%VS15INSTALLDIR%\VC\Auxiliary\Build\vcvarsall.bat"
goto vswhere
)
for /f "usebackq tokens=*" %%i in (`"%ProgramFiles(x86)%\Microsoft Visual Studio\Installer\vswhere.exe" -legacy -products * -version [%VC_VERSION_LOWER%^,%VC_VERSION_UPPER%^) -property installationPath`) do (
if exist "%%i" if exist "%%i\VC\Auxiliary\Build\vcvarsall.bat" (
set "VS15INSTALLDIR=%%i"
set "VS15VCVARSALL=%%i\VC\Auxiliary\Build\vcvarsall.bat"
goto vswhere
)
)
:vswhere
IF "%VS15VCVARSALL%"=="" (
echo Visual Studio %VC_YEAR% C++ BuildTools is required to compile PyTorch on Windows
exit /b 1
)
set MSSdk=1
set DISTUTILS_USE_SDK=1
where /q python.exe
IF ERRORLEVEL 1 (
echo Python x64 3.5 or up is required to compile PyTorch on Windows
exit /b 1
)
for /F "usebackq delims=" %%i in (`python -c "import sys; print('{0[0]}{0[1]}'.format(sys.version_info))"`) do (
set /a PYVER=%%i
)
if %PYVER% LSS 35 (
echo Warning: PyTorch for Python 2 under Windows is experimental.
echo Python x64 3.5 or up is recommended to compile PyTorch on Windows
echo Maybe you can create a virual environment if you have conda installed:
echo ^> conda create -n test python=3.6 pyyaml numpy
echo ^> activate test
)
for /F "usebackq delims=" %%i in (`python -c "import struct;print( 8 * struct.calcsize('P'))"`) do (
set /a PYSIZE=%%i
)
if %PYSIZE% NEQ 64 (
echo Python x64 3.5 or up is required to compile PyTorch on Windows
exit /b 1
)

View File

@ -1,47 +0,0 @@
@echo off
REM Check for optional components
where /q ninja.exe
IF NOT ERRORLEVEL 1 (
echo Ninja found, using it to speed up builds
set CMAKE_GENERATOR=Ninja
)
IF "%USE_SCCACHE%" == "0" goto sccache_end
where /q clcache.exe
IF NOT ERRORLEVEL 1 (
echo clcache found, using it to speed up builds
set CC=clcache
set CXX=clcache
)
where /q sccache-cl.exe
IF NOT ERRORLEVEL 1 (
echo sccache-cl found, using it to speed up builds
set CC=sccache-cl
set CXX=sccache-cl
)
IF "%CC%" == "sccache-cl" IF "%CXX%" == "sccache-cl" goto sccache_end
where /q sccache.exe
IF NOT ERRORLEVEL 1 (
echo sccache found, using it to speed up builds
set CC=sccache cl
set CXX=sccache cl
)
:sccache_end
IF exist "%MKLProductDir%\mkl\lib\intel64_win" (
echo MKL found, adding it to build
set "LIB=%MKLProductDir%\mkl\lib\intel64_win;%MKLProductDir%\compiler\lib\intel64_win;%LIB%";
)
exit /b 0

View File

@ -1,5 +0,0 @@
@echo off
cd %MODULE_NAME%
python setup.py clean
cd ..

View File

@ -1,51 +0,0 @@
@echo off
:: The conda and wheels jobs are separated on Windows, so we don't need to clone again.
if not exist "%NIGHTLIES_PYTORCH_ROOT%" goto clone_pytorch
echo "Changing to NIGHTLIES_PYTORCH_ROOT"
cd "%NIGHTLIES_PYTORCH_ROOT%"
goto submodule
:clone_pytorch
git clone https://github.com/%PYTORCH_REPO%/%MODULE_NAME%
cd %MODULE_NAME%
IF NOT "%PYTORCH_BRANCH%" == "latest" goto latest_end
:latest_start
if NOT "%NIGHTLIES_DATE%" == "" goto date_end
:date_start
set "DATE_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyy_MM_dd'"
set "DATE_COMPACT_CMD=Get-Date ([System.TimeZoneInfo]::ConvertTimeFromUtc((Get-Date).ToUniversalTime(), [System.TimeZoneInfo]::FindSystemTimeZoneById('Pacific Standard Time'))) -f 'yyyyMMdd'"
FOR /F "delims=" %%i IN ('powershell -c "%DATE_CMD%"') DO set NIGHTLIES_DATE=%%i
FOR /F "delims=" %%i IN ('powershell -c "%DATE_COMPACT_CMD%"') DO set NIGHTLIES_DATE_COMPACT=%%i
:date_end
if "%NIGHTLIES_DATE_COMPACT%" == "" set NIGHTLIES_DATE_COMPACT=%NIGHTLIES_DATE:~0,4%%NIGHTLIES_DATE:~5,2%%NIGHTLIES_DATE:~8,2%
:: Switch to the latest commit by 11:59 yesterday
echo PYTORCH_BRANCH is set to latest so I will find the last commit
echo before 0:00 midnight on %NIGHTLIES_DATE%
set git_date=%NIGHTLIES_DATE:_=-%
FOR /F "delims=" %%i IN ('git log --before %git_date% -n 1 "--pretty=%%H"') DO set last_commit=%%i
echo Setting PYTORCH_BRANCH to %last_commit% since that was the last
echo commit before %NIGHTLIES_DATE%
set PYTORCH_BRANCH=%last_commit%
:latest_end
IF "%PYTORCH_BRANCH%" == "" set PYTORCH_BRANCH=v%PYTORCH_BUILD_VERSION%
git checkout %PYTORCH_BRANCH%
IF ERRORLEVEL 1 git checkout tags/%PYTORCH_BRANCH%
:submodule
git submodule update --init --recursive
IF ERRORLEVEL 1 exit /b 1

View File

@ -1,26 +0,0 @@
copy "%CUDA_PATH%\bin\cusparse*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\cublas*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\cudart*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\curand*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\cufft*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\cusolver*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\cudnn*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\bin\nvrtc*64_*.dll*" pytorch\torch\lib
copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
copy "C:\Program Files\NVIDIA Corporation\NvToolsExt\bin\x64\nvToolsExt64_1.dll*" pytorch\torch\lib
copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
:: Should be set in build_pytorch.bat
copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib
::copy zlib if it exist in windows/system32
if exist "C:\Windows\System32\zlibwapi.dll" (
copy "C:\Windows\System32\zlibwapi.dll" pytorch\torch\lib
)
::copy nvJitLink dll is requires for cuda 12+
if exist "%CUDA_PATH%\bin\nvJitLink_*.dll*" (
copy "%CUDA_PATH%\bin\nvJitLink_*.dll*" pytorch\torch\lib
)

View File

@ -1,3 +0,0 @@
copy "%CONDA_LIB_PATH%\libiomp*5md.dll" pytorch\torch\lib
:: Should be set in build_pytorch.bat
copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib

View File

@ -1,9 +0,0 @@
set WIN_DRIVER_VN=528.89
set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/%WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe"
curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe
if errorlevel 1 exit /b 1
start /wait %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe -s -noreboot
if errorlevel 1 exit /b 1
del %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe || ver > NUL

Some files were not shown because too many files have changed in this diff Show More