mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-29 11:14:56 +08:00
Compare commits
1 Commits
dev/joona/
...
bisect_per
| Author | SHA1 | Date | |
|---|---|---|---|
| 8c1bc4f522 |
@ -3,15 +3,22 @@ set -eux -o pipefail
|
||||
|
||||
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||
|
||||
if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
|
||||
export TORCH_CUDA_ARCH_LIST="9.0"
|
||||
elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
|
||||
export TORCH_CUDA_ARCH_LIST="9.0;10.0;12.0"
|
||||
fi
|
||||
|
||||
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
|
||||
source $SCRIPTPATH/aarch64_ci_setup.sh
|
||||
|
||||
tagged_version() {
|
||||
GIT_DESCRIBE="git --git-dir /pytorch/.git describe --tags --match v[0-9]*.[0-9]*.[0-9]*"
|
||||
if ${GIT_DESCRIBE} --exact >/dev/null; then
|
||||
${GIT_DESCRIBE}
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
if tagged_version >/dev/null; then
|
||||
export OVERRIDE_PACKAGE_VERSION="$(tagged_version | sed -e 's/^v//' -e 's/-.*$//')"
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Run aarch64 builder python
|
||||
###############################################################################
|
||||
@ -20,7 +27,7 @@ cd /
|
||||
# on the mounted pytorch repo
|
||||
git config --global --add safe.directory /pytorch
|
||||
pip install -r /pytorch/requirements.txt
|
||||
pip install auditwheel==6.2.0
|
||||
pip install auditwheel
|
||||
if [ "$DESIRED_CUDA" = "cpu" ]; then
|
||||
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
|
||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||
|
||||
@ -5,14 +5,16 @@ set -eux -o pipefail
|
||||
# By creating symlinks from desired /opt/python to /usr/local/bin/
|
||||
|
||||
NUMPY_VERSION=2.0.2
|
||||
if [[ "$DESIRED_PYTHON" == "3.13" || "$DESIRED_PYTHON" == "3.13t" ]]; then
|
||||
PYGIT2_VERSION=1.15.1
|
||||
if [[ "$DESIRED_PYTHON" == "3.13" ]]; then
|
||||
NUMPY_VERSION=2.1.2
|
||||
PYGIT2_VERSION=1.16.0
|
||||
fi
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
source $SCRIPTPATH/../manywheel/set_desired_python.sh
|
||||
|
||||
pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2
|
||||
pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2 pygit2==${PYGIT2_VERSION}
|
||||
|
||||
for tool in python python3 pip pip3 ninja scons patchelf; do
|
||||
ln -sf ${DESIRED_PYTHON_BIN_DIR}/${tool} /usr/local/bin;
|
||||
|
||||
@ -4,9 +4,12 @@
|
||||
import os
|
||||
import shutil
|
||||
from subprocess import check_call, check_output
|
||||
from typing import List
|
||||
|
||||
from pygit2 import Repository
|
||||
|
||||
|
||||
def list_dir(path: str) -> list[str]:
|
||||
def list_dir(path: str) -> List[str]:
|
||||
"""'
|
||||
Helper for getting paths for Python
|
||||
"""
|
||||
@ -39,7 +42,7 @@ def build_ArmComputeLibrary() -> None:
|
||||
"clone",
|
||||
"https://github.com/ARM-software/ComputeLibrary.git",
|
||||
"-b",
|
||||
"v25.02",
|
||||
"v24.09",
|
||||
"--depth",
|
||||
"1",
|
||||
"--shallow-submodules",
|
||||
@ -55,7 +58,7 @@ def build_ArmComputeLibrary() -> None:
|
||||
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")
|
||||
|
||||
|
||||
def update_wheel(wheel_path, desired_cuda) -> None:
|
||||
def update_wheel(wheel_path) -> None:
|
||||
"""
|
||||
Update the cuda wheel libraries
|
||||
"""
|
||||
@ -77,6 +80,7 @@ def update_wheel(wheel_path, desired_cuda) -> None:
|
||||
"/usr/local/cuda/lib64/libnvToolsExt.so.1",
|
||||
"/usr/local/cuda/lib64/libnvJitLink.so.12",
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.12",
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4",
|
||||
"/usr/local/cuda/lib64/libcudnn_adv.so.9",
|
||||
"/usr/local/cuda/lib64/libcudnn_cnn.so.9",
|
||||
"/usr/local/cuda/lib64/libcudnn_graph.so.9",
|
||||
@ -96,18 +100,6 @@ def update_wheel(wheel_path, desired_cuda) -> None:
|
||||
"/usr/local/lib/libnvpl_lapack_core.so.0",
|
||||
"/usr/local/lib/libnvpl_blas_core.so.0",
|
||||
]
|
||||
if "126" in desired_cuda:
|
||||
libs_to_copy += [
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.6",
|
||||
"/usr/local/cuda/lib64/libcufile.so.0",
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
|
||||
]
|
||||
elif "128" in desired_cuda:
|
||||
libs_to_copy += [
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.8",
|
||||
"/usr/local/cuda/lib64/libcufile.so.0",
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
|
||||
]
|
||||
else:
|
||||
libs_to_copy += [
|
||||
"/opt/OpenBLAS/lib/libopenblas.so.0",
|
||||
@ -136,9 +128,6 @@ def complete_wheel(folder: str) -> str:
|
||||
"""
|
||||
wheel_name = list_dir(f"/{folder}/dist")[0]
|
||||
|
||||
# Please note for cuda we don't run auditwheel since we use custom script to package
|
||||
# the cuda dependencies to the wheel file using update_wheel() method.
|
||||
# However we need to make sure filename reflects the correct Manylinux platform.
|
||||
if "pytorch" in folder and not enable_cuda:
|
||||
print("Repairing Wheel with AuditWheel")
|
||||
check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder)
|
||||
@ -150,14 +139,7 @@ def complete_wheel(folder: str) -> str:
|
||||
f"/{folder}/dist/{repaired_wheel_name}",
|
||||
)
|
||||
else:
|
||||
repaired_wheel_name = wheel_name.replace(
|
||||
"linux_aarch64", "manylinux_2_28_aarch64"
|
||||
)
|
||||
print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}")
|
||||
os.rename(
|
||||
f"/{folder}/dist/{wheel_name}",
|
||||
f"/{folder}/dist/{repaired_wheel_name}",
|
||||
)
|
||||
repaired_wheel_name = wheel_name
|
||||
|
||||
print(f"Copying {repaired_wheel_name} to artifacts")
|
||||
shutil.copy2(
|
||||
@ -189,22 +171,22 @@ if __name__ == "__main__":
|
||||
args = parse_arguments()
|
||||
enable_mkldnn = args.enable_mkldnn
|
||||
enable_cuda = args.enable_cuda
|
||||
branch = check_output(
|
||||
["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd="/pytorch"
|
||||
).decode()
|
||||
repo = Repository("/pytorch")
|
||||
branch = repo.head.name
|
||||
if branch == "HEAD":
|
||||
branch = "master"
|
||||
|
||||
print("Building PyTorch wheel")
|
||||
build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
|
||||
os.system("cd /pytorch; python setup.py clean")
|
||||
|
||||
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
|
||||
desired_cuda = os.getenv("DESIRED_CUDA")
|
||||
if override_package_version is not None:
|
||||
version = override_package_version
|
||||
build_vars += (
|
||||
f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
|
||||
)
|
||||
elif branch in ["nightly", "main"]:
|
||||
elif branch in ["nightly", "master"]:
|
||||
build_date = (
|
||||
check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch")
|
||||
.decode()
|
||||
@ -214,11 +196,12 @@ if __name__ == "__main__":
|
||||
check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2]
|
||||
)
|
||||
if enable_cuda:
|
||||
desired_cuda = os.getenv("DESIRED_CUDA")
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date}+{desired_cuda} PYTORCH_BUILD_NUMBER=1 "
|
||||
else:
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
|
||||
elif branch.startswith(("v1.", "v2.")):
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
|
||||
|
||||
if enable_mkldnn:
|
||||
build_ArmComputeLibrary()
|
||||
@ -242,6 +225,6 @@ if __name__ == "__main__":
|
||||
print("Updating Cuda Dependency")
|
||||
filename = os.listdir("/pytorch/dist/")
|
||||
wheel_path = f"/pytorch/dist/{filename[0]}"
|
||||
update_wheel(wheel_path, desired_cuda)
|
||||
update_wheel(wheel_path)
|
||||
pytorch_wheel_name = complete_wheel("/pytorch/")
|
||||
print(f"Build Complete. Created {pytorch_wheel_name}..")
|
||||
|
||||
@ -12,22 +12,22 @@ import os
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
from typing import Optional, Union
|
||||
from typing import Dict, List, Optional, Tuple, Union
|
||||
|
||||
import boto3
|
||||
|
||||
|
||||
# AMI images for us-east-1, change the following based on your ~/.aws/config
|
||||
os_amis = {
|
||||
"ubuntu18_04": "ami-078eece1d8119409f", # login_name: ubuntu
|
||||
"ubuntu20_04": "ami-052eac90edaa9d08f", # login_name: ubuntu
|
||||
"ubuntu22_04": "ami-0c6c29c5125214c77", # login_name: ubuntu
|
||||
"redhat8": "ami-0698b90665a2ddcf1", # login_name: ec2-user
|
||||
}
|
||||
|
||||
ubuntu20_04_ami = os_amis["ubuntu20_04"]
|
||||
ubuntu18_04_ami = os_amis["ubuntu18_04"]
|
||||
|
||||
|
||||
def compute_keyfile_path(key_name: Optional[str] = None) -> tuple[str, str]:
|
||||
def compute_keyfile_path(key_name: Optional[str] = None) -> Tuple[str, str]:
|
||||
if key_name is None:
|
||||
key_name = os.getenv("AWS_KEY_NAME")
|
||||
if key_name is None:
|
||||
@ -57,7 +57,7 @@ def ec2_instances_by_id(instance_id):
|
||||
|
||||
|
||||
def start_instance(
|
||||
key_name, ami=ubuntu20_04_ami, instance_type="t4g.2xlarge", ebs_size: int = 50
|
||||
key_name, ami=ubuntu18_04_ami, instance_type="t4g.2xlarge", ebs_size: int = 50
|
||||
):
|
||||
inst = ec2.create_instances(
|
||||
ImageId=ami,
|
||||
@ -96,7 +96,7 @@ class RemoteHost:
|
||||
self.keyfile_path = keyfile_path
|
||||
self.login_name = login_name
|
||||
|
||||
def _gen_ssh_prefix(self) -> list[str]:
|
||||
def _gen_ssh_prefix(self) -> List[str]:
|
||||
return [
|
||||
"ssh",
|
||||
"-o",
|
||||
@ -108,13 +108,13 @@ class RemoteHost:
|
||||
]
|
||||
|
||||
@staticmethod
|
||||
def _split_cmd(args: Union[str, list[str]]) -> list[str]:
|
||||
def _split_cmd(args: Union[str, List[str]]) -> List[str]:
|
||||
return args.split() if isinstance(args, str) else args
|
||||
|
||||
def run_ssh_cmd(self, args: Union[str, list[str]]) -> None:
|
||||
def run_ssh_cmd(self, args: Union[str, List[str]]) -> None:
|
||||
subprocess.check_call(self._gen_ssh_prefix() + self._split_cmd(args))
|
||||
|
||||
def check_ssh_output(self, args: Union[str, list[str]]) -> str:
|
||||
def check_ssh_output(self, args: Union[str, List[str]]) -> str:
|
||||
return subprocess.check_output(
|
||||
self._gen_ssh_prefix() + self._split_cmd(args)
|
||||
).decode("utf-8")
|
||||
@ -157,7 +157,7 @@ class RemoteHost:
|
||||
def using_docker(self) -> bool:
|
||||
return self.container_id is not None
|
||||
|
||||
def run_cmd(self, args: Union[str, list[str]]) -> None:
|
||||
def run_cmd(self, args: Union[str, List[str]]) -> None:
|
||||
if not self.using_docker():
|
||||
return self.run_ssh_cmd(args)
|
||||
assert self.container_id is not None
|
||||
@ -178,7 +178,7 @@ class RemoteHost:
|
||||
if rc != 0:
|
||||
raise subprocess.CalledProcessError(rc, docker_cmd)
|
||||
|
||||
def check_output(self, args: Union[str, list[str]]) -> str:
|
||||
def check_output(self, args: Union[str, List[str]]) -> str:
|
||||
if not self.using_docker():
|
||||
return self.check_ssh_output(args)
|
||||
assert self.container_id is not None
|
||||
@ -230,7 +230,7 @@ class RemoteHost:
|
||||
)
|
||||
self.download_file(remote_file, local_file)
|
||||
|
||||
def list_dir(self, path: str) -> list[str]:
|
||||
def list_dir(self, path: str) -> List[str]:
|
||||
return self.check_output(["ls", "-1", path]).split("\n")
|
||||
|
||||
|
||||
@ -327,7 +327,7 @@ def build_ArmComputeLibrary(host: RemoteHost, git_clone_flags: str = "") -> None
|
||||
]
|
||||
)
|
||||
host.run_cmd(
|
||||
f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v25.02 {git_clone_flags}"
|
||||
f"git clone https://github.com/ARM-software/ComputeLibrary.git -b v24.09 {git_clone_flags}"
|
||||
)
|
||||
|
||||
host.run_cmd(f"cd ComputeLibrary && scons Werror=1 -j8 {acl_build_flags}")
|
||||
@ -358,7 +358,7 @@ def checkout_repo(
|
||||
branch: str = "main",
|
||||
url: str,
|
||||
git_clone_flags: str,
|
||||
mapping: dict[str, tuple[str, str]],
|
||||
mapping: Dict[str, Tuple[str, str]],
|
||||
) -> Optional[str]:
|
||||
for prefix in mapping:
|
||||
if not branch.startswith(prefix):
|
||||
@ -619,11 +619,9 @@ def build_torchaudio(
|
||||
if host.using_docker():
|
||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||
|
||||
host.run_cmd(
|
||||
f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \
|
||||
host.run_cmd(f"cd audio && export FFMPEG_ROOT=$(pwd)/third_party/ffmpeg && export USE_FFMPEG=1 \
|
||||
&& ./packaging/ffmpeg/build.sh \
|
||||
&& {build_vars} python3 setup.py bdist_wheel"
|
||||
)
|
||||
&& {build_vars} python3 setup.py bdist_wheel")
|
||||
|
||||
wheel_name = host.list_dir("audio/dist")[0]
|
||||
embed_libgomp(host, use_conda, os.path.join("audio", "dist", wheel_name))
|
||||
@ -657,6 +655,18 @@ def configure_system(
|
||||
"sudo apt-get install -y python3-dev python3-yaml python3-setuptools python3-wheel python3-pip"
|
||||
)
|
||||
host.run_cmd("pip3 install dataclasses typing-extensions")
|
||||
# Install and switch to gcc-8 on Ubuntu-18.04
|
||||
if not host.using_docker() and host.ami == ubuntu18_04_ami and compiler == "gcc-8":
|
||||
host.run_cmd("sudo apt-get install -y g++-8 gfortran-8")
|
||||
host.run_cmd(
|
||||
"sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-8 100"
|
||||
)
|
||||
host.run_cmd(
|
||||
"sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-8 100"
|
||||
)
|
||||
host.run_cmd(
|
||||
"sudo update-alternatives --install /usr/bin/gfortran gfortran /usr/bin/gfortran-8 100"
|
||||
)
|
||||
if not use_conda:
|
||||
print("Installing Cython + numpy from PyPy")
|
||||
host.run_cmd("sudo pip3 install Cython")
|
||||
@ -669,7 +679,7 @@ def build_domains(
|
||||
branch: str = "main",
|
||||
use_conda: bool = True,
|
||||
git_clone_flags: str = "",
|
||||
) -> tuple[str, str, str, str]:
|
||||
) -> Tuple[str, str, str, str]:
|
||||
vision_wheel_name = build_torchvision(
|
||||
host, branch=branch, use_conda=use_conda, git_clone_flags=git_clone_flags
|
||||
)
|
||||
@ -696,7 +706,7 @@ def start_build(
|
||||
pytorch_build_number: Optional[str] = None,
|
||||
shallow_clone: bool = True,
|
||||
enable_mkldnn: bool = False,
|
||||
) -> tuple[str, str, str, str, str]:
|
||||
) -> Tuple[str, str, str, str, str]:
|
||||
git_clone_flags = " --depth 1 --shallow-submodules" if shallow_clone else ""
|
||||
if host.using_docker() and not use_conda:
|
||||
print("Auto-selecting conda option for docker images")
|
||||
@ -747,7 +757,7 @@ def start_build(
|
||||
version = host.check_output("cat pytorch/version.txt").strip()[:-2]
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1"
|
||||
if branch.startswith(("v1.", "v2.")):
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1"
|
||||
if host.using_docker():
|
||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||
if enable_mkldnn:
|
||||
@ -920,9 +930,9 @@ def parse_arguments():
|
||||
parser.add_argument("--debug", action="store_true")
|
||||
parser.add_argument("--build-only", action="store_true")
|
||||
parser.add_argument("--test-only", type=str)
|
||||
group = parser.add_mutually_exclusive_group()
|
||||
group.add_argument("--os", type=str, choices=list(os_amis.keys()))
|
||||
group.add_argument("--ami", type=str)
|
||||
parser.add_argument(
|
||||
"--os", type=str, choices=list(os_amis.keys()), default="ubuntu20_04"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--python-version",
|
||||
type=str,
|
||||
@ -952,13 +962,7 @@ def parse_arguments():
|
||||
|
||||
if __name__ == "__main__":
|
||||
args = parse_arguments()
|
||||
ami = (
|
||||
args.ami
|
||||
if args.ami is not None
|
||||
else os_amis[args.os]
|
||||
if args.os is not None
|
||||
else ubuntu20_04_ami
|
||||
)
|
||||
ami = os_amis[args.os]
|
||||
keyfile_path, key_name = compute_keyfile_path(args.key_name)
|
||||
|
||||
if args.list_instances:
|
||||
@ -1012,7 +1016,7 @@ if __name__ == "__main__":
|
||||
install_condaforge_python(host, args.python_version)
|
||||
sys.exit(0)
|
||||
|
||||
python_version = args.python_version if args.python_version is not None else "3.9"
|
||||
python_version = args.python_version if args.python_version is not None else "3.8"
|
||||
|
||||
if args.use_torch_from_pypi:
|
||||
configure_system(host, compiler=args.compiler, python_version=python_version)
|
||||
|
||||
@ -44,8 +44,6 @@ FROM base as cuda
|
||||
ARG CUDA_VERSION=12.4
|
||||
RUN rm -rf /usr/local/cuda-*
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
|
||||
# Preserve CUDA_VERSION for the builds
|
||||
ENV CUDA_VERSION=${CUDA_VERSION}
|
||||
|
||||
@ -1,60 +1,82 @@
|
||||
#!/usr/bin/env bash
|
||||
# Script used only in CD pipeline
|
||||
|
||||
set -exou pipefail
|
||||
set -eou pipefail
|
||||
|
||||
image="$1"
|
||||
shift
|
||||
|
||||
if [ -z "${image}" ]; then
|
||||
echo "Usage: $0 IMAGENAME:ARCHTAG"
|
||||
echo "Usage: $0 IMAGE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Go from imagename:tag to tag
|
||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
||||
DOCKER_IMAGE_NAME="pytorch/${image}"
|
||||
|
||||
CUDA_VERSION=""
|
||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
||||
# extract cuda version from image name and tag. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
||||
CUDA_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
||||
fi
|
||||
|
||||
case ${DOCKER_TAG_PREFIX} in
|
||||
cpu)
|
||||
BASE_TARGET=base
|
||||
;;
|
||||
cuda*)
|
||||
BASE_TARGET=cuda${CUDA_VERSION}
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
|
||||
export DOCKER_BUILDKIT=1
|
||||
TOPDIR=$(git rev-parse --show-toplevel)
|
||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
docker build \
|
||||
--target final \
|
||||
--progress plain \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "DEVTOOLSET_VERSION=11" \
|
||||
-t ${tmp_tag} \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
|
||||
${TOPDIR}/.ci/docker/
|
||||
CUDA_VERSION=${CUDA_VERSION:-12.1}
|
||||
|
||||
if [ -n "${CUDA_VERSION}" ]; then
|
||||
case ${CUDA_VERSION} in
|
||||
cpu)
|
||||
BASE_TARGET=base
|
||||
DOCKER_TAG=cpu
|
||||
;;
|
||||
all)
|
||||
BASE_TARGET=all_cuda
|
||||
DOCKER_TAG=latest
|
||||
;;
|
||||
*)
|
||||
BASE_TARGET=cuda${CUDA_VERSION}
|
||||
DOCKER_TAG=cuda${CUDA_VERSION}
|
||||
;;
|
||||
esac
|
||||
|
||||
|
||||
(
|
||||
set -x
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
|
||||
docker build \
|
||||
--target final \
|
||||
--progress plain \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "DEVTOOLSET_VERSION=11" \
|
||||
-t ${DOCKER_IMAGE_NAME} \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
|
||||
${TOPDIR}/.ci/docker/
|
||||
)
|
||||
|
||||
if [[ "${DOCKER_TAG}" =~ ^cuda* ]]; then
|
||||
# Test that we're using the right CUDA compiler
|
||||
docker run --rm "${tmp_tag}" nvcc --version | grep "cuda_${CUDA_VERSION}"
|
||||
(
|
||||
set -x
|
||||
docker run --rm "${DOCKER_IMAGE_NAME}" nvcc --version | grep "cuda_${CUDA_VERSION}"
|
||||
)
|
||||
fi
|
||||
|
||||
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE_NAME}-${GIT_BRANCH_NAME}
|
||||
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE_NAME}-${GIT_COMMIT_SHA}
|
||||
if [[ "${WITH_PUSH:-}" == true ]]; then
|
||||
(
|
||||
set -x
|
||||
docker push "${DOCKER_IMAGE_NAME}"
|
||||
if [[ -n ${GITHUB_REF} ]]; then
|
||||
docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||
docker tag ${DOCKER_IMAGE_NAME} ${DOCKER_IMAGE_SHA_TAG}
|
||||
docker push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||
docker push "${DOCKER_IMAGE_SHA_TAG}"
|
||||
fi
|
||||
)
|
||||
fi
|
||||
|
||||
5
.ci/docker/aotriton_version.txt
Normal file
5
.ci/docker/aotriton_version.txt
Normal file
@ -0,0 +1,5 @@
|
||||
0.7b
|
||||
manylinux_2_17
|
||||
rocm6.2
|
||||
9be04068c3c0857a4cfd17d7e39e71d0423ebac2
|
||||
3e9e1959d23b93d78a08fcc5f868125dc3854dece32fd9458be9ef4467982291
|
||||
@ -1,8 +1,4 @@
|
||||
#!/bin/bash
|
||||
# The purpose of this script is to:
|
||||
# 1. Extract the set of parameters to be used for a docker build based on the provided image name.
|
||||
# 2. Run docker build with the parameters found in step 1.
|
||||
# 3. Run the built image and print out the expected and actual versions of packages installed.
|
||||
|
||||
set -ex
|
||||
|
||||
@ -90,21 +86,32 @@ CMAKE_VERSION=3.18.5
|
||||
|
||||
_UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb
|
||||
_UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
|
||||
if [[ "$image" == *rocm* ]]; then
|
||||
_UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
|
||||
_UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
|
||||
fi
|
||||
|
||||
# It's annoying to rename jobs every time you want to rewrite a
|
||||
# configuration, so we hardcode everything here rather than do it
|
||||
# from scratch
|
||||
case "$image" in
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11)
|
||||
CUDA_VERSION=12.6.3
|
||||
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.4.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.1.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -118,6 +125,37 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.1.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.1.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -132,75 +170,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.4-cudnn9-py3.13-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.4.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.13
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.6.3
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.6.3
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3.12-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.6.3
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.6-cudnn9-py3.13-gcc9-inductor-benchmarks)
|
||||
CUDA_VERSION=12.6.3
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.13
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -215,6 +185,49 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.4.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.1.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9)
|
||||
CUDA_VERSION=12.4.1
|
||||
CUDNN_VERSION=9
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
@ -226,6 +239,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
ONNX=yes
|
||||
@ -234,7 +248,10 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
@ -242,7 +259,10 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.11
|
||||
CLANG_VERSION=10
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
VULKAN_SDK_VERSION=1.2.162.1
|
||||
SWIFTSHADER=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
@ -250,42 +270,38 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-focal-rocm-n-1-py3)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=6.2.4
|
||||
ROCM_VERSION=6.1
|
||||
NINJA_VERSION=1.9.0
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-focal-rocm-n-py3)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
GCC_VERSION=9
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=6.3
|
||||
ROCM_VERSION=6.2
|
||||
NINJA_VERSION=1.9.0
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
KATEX=yes
|
||||
UCX_COMMIT=${_UCX_COMMIT}
|
||||
UCC_COMMIT=${_UCC_COMMIT}
|
||||
INDUCTOR_BENCHMARKS=yes
|
||||
;;
|
||||
pytorch-linux-jammy-xpu-2024.0-py3)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
XPU_VERSION=0.5
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -296,6 +312,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
XPU_VERSION=2025.0
|
||||
NINJA_VERSION=1.9.0
|
||||
@ -306,6 +323,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
CONDA_CMAKE=yes
|
||||
@ -319,6 +337,7 @@ case "$image" in
|
||||
CUDNN_VERSION=9
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
@ -326,6 +345,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=12
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
@ -346,6 +366,7 @@ case "$image" in
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
KATEX=yes
|
||||
CONDA_CMAKE=yes
|
||||
@ -360,7 +381,7 @@ case "$image" in
|
||||
EXECUTORCH=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.12-halide)
|
||||
CUDA_VERSION=12.6
|
||||
CUDA_VERSION=12.4
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=11
|
||||
CONDA_CMAKE=yes
|
||||
@ -368,7 +389,7 @@ case "$image" in
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.12-triton-cpu)
|
||||
CUDA_VERSION=12.6
|
||||
CUDA_VERSION=12.4
|
||||
ANACONDA_PYTHON_VERSION=3.12
|
||||
GCC_VERSION=11
|
||||
CONDA_CMAKE=yes
|
||||
@ -378,19 +399,20 @@ case "$image" in
|
||||
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||
# We will need to update mypy version eventually, but that's for another day. The task
|
||||
# would be to upgrade mypy to 1.0.0 with Python 3.11
|
||||
PYTHON_VERSION=3.9
|
||||
PIP_CMAKE=yes
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-jammy-cuda11.8-cudnn9-py3.9-linter)
|
||||
PYTHON_VERSION=3.9
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CUDA_VERSION=11.8
|
||||
PIP_CMAKE=yes
|
||||
CONDA_CMAKE=yes
|
||||
;;
|
||||
pytorch-linux-jammy-aarch64-py3.10-gcc11)
|
||||
ANACONDA_PYTHON_VERSION=3.10
|
||||
GCC_VERSION=11
|
||||
ACL=yes
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
# snadampal: skipping llvm src build install because the current version
|
||||
@ -402,6 +424,7 @@ case "$image" in
|
||||
GCC_VERSION=11
|
||||
ACL=yes
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
# snadampal: skipping llvm src build install because the current version
|
||||
@ -412,6 +435,7 @@ case "$image" in
|
||||
*)
|
||||
# Catch-all for builds that are not hardcoded.
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
echo "image '$image' did not match an existing build configuration"
|
||||
if [[ "$image" == *py* ]]; then
|
||||
@ -460,21 +484,14 @@ if [[ "$image" == *cuda* && ${OS} == "ubuntu" ]]; then
|
||||
fi
|
||||
fi
|
||||
|
||||
no_cache_flag=""
|
||||
progress_flag=""
|
||||
# Do not use cache and progress=plain when in CI
|
||||
if [[ -n "${CI:-}" ]]; then
|
||||
no_cache_flag="--no-cache"
|
||||
progress_flag="--progress=plain"
|
||||
fi
|
||||
|
||||
# Build image
|
||||
docker build \
|
||||
${no_cache_flag} \
|
||||
${progress_flag} \
|
||||
--no-cache \
|
||||
--progress=plain \
|
||||
--build-arg "BUILD_ENVIRONMENT=${image}" \
|
||||
--build-arg "PROTOBUF=${PROTOBUF:-}" \
|
||||
--build-arg "LLVMDEV=${LLVMDEV:-}" \
|
||||
--build-arg "DB=${DB:-}" \
|
||||
--build-arg "VISION=${VISION:-}" \
|
||||
--build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
|
||||
--build-arg "CENTOS_VERSION=${CENTOS_VERSION}" \
|
||||
@ -482,22 +499,22 @@ docker build \
|
||||
--build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
|
||||
--build-arg "CLANG_VERSION=${CLANG_VERSION}" \
|
||||
--build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
|
||||
--build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
|
||||
--build-arg "GCC_VERSION=${GCC_VERSION}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
|
||||
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
|
||||
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
|
||||
--build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \
|
||||
--build-arg "SWIFTSHADER=${SWIFTSHADER}" \
|
||||
--build-arg "CMAKE_VERSION=${CMAKE_VERSION:-}" \
|
||||
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
||||
--build-arg "KATEX=${KATEX:-}" \
|
||||
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
|
||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
|
||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx906;gfx90a}" \
|
||||
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
|
||||
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||
--build-arg "CONDA_CMAKE=${CONDA_CMAKE}" \
|
||||
--build-arg "PIP_CMAKE=${PIP_CMAKE}" \
|
||||
--build-arg "TRITON=${TRITON}" \
|
||||
--build-arg "TRITON_CPU=${TRITON_CPU}" \
|
||||
--build-arg "ONNX=${ONNX}" \
|
||||
@ -523,7 +540,7 @@ docker build \
|
||||
UBUNTU_VERSION=$(echo ${UBUNTU_VERSION} | sed 's/-rc$//')
|
||||
|
||||
function drun() {
|
||||
docker run --rm "$tmp_tag" "$@"
|
||||
docker run --rm "$tmp_tag" $*
|
||||
}
|
||||
|
||||
if [[ "$OS" == "ubuntu" ]]; then
|
||||
@ -571,14 +588,3 @@ if [ -n "$KATEX" ]; then
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
HAS_TRITON=$(drun python -c "import triton" > /dev/null 2>&1 && echo "yes" || echo "no")
|
||||
if [[ -n "$TRITON" || -n "$TRITON_CPU" ]]; then
|
||||
if [ "$HAS_TRITON" = "no" ]; then
|
||||
echo "expecting triton to be installed, but it is not"
|
||||
exit 1
|
||||
fi
|
||||
elif [ "$HAS_TRITON" = "yes" ]; then
|
||||
echo "expecting triton to not be installed, but it is"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@ -55,6 +55,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
@ -68,7 +75,7 @@ COPY ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh
|
||||
RUN rm install_rocm.sh
|
||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
||||
RUN bash ./install_rocm_magma.sh
|
||||
RUN rm install_rocm_magma.sh
|
||||
COPY ./common/install_amdsmi.sh install_amdsmi.sh
|
||||
RUN bash ./install_amdsmi.sh
|
||||
@ -106,6 +113,13 @@ COPY triton_version.txt triton_version.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
|
||||
|
||||
# Install AOTriton (Early fail)
|
||||
COPY ./aotriton_version.txt aotriton_version.txt
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./common/install_aotriton.sh install_aotriton.sh
|
||||
RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"]
|
||||
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
COPY ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
|
||||
@ -1 +1 @@
|
||||
381ae5d57d35c165d98df728380b20fbde350392
|
||||
6f638937d64e3396793956d75ee3e14802022745
|
||||
|
||||
@ -1 +0,0 @@
|
||||
v2.21.5-1
|
||||
@ -1 +0,0 @@
|
||||
v2.26.2-1
|
||||
@ -1 +1 @@
|
||||
5d535d7a2d4b435b1b5c1177fd8f04a12b942b9a
|
||||
ac3470188b914c5d7a5058a7e28b9eb685a62427
|
||||
|
||||
@ -1 +1 @@
|
||||
0bcc8265e677e5321606a3311bf71470f14456a8
|
||||
91b14bf5593cf58a8541f3e6b9125600a867d4ef
|
||||
|
||||
@ -1 +1 @@
|
||||
96316ce50fade7e209553aba4898cd9b82aab83b
|
||||
cf34004b8a67d290a962da166f5aa2fc66751326
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
set -euo pipefail
|
||||
|
||||
readonly version=v25.02
|
||||
readonly src_host=https://github.com/ARM-software
|
||||
readonly version=v24.04
|
||||
readonly src_host=https://review.mlplatform.org/ml
|
||||
readonly src_repo=ComputeLibrary
|
||||
|
||||
# Clone ACL
|
||||
|
||||
23
.ci/docker/common/install_aotriton.sh
Executable file
23
.ci/docker/common/install_aotriton.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
TARBALL='aotriton.tar.gz'
|
||||
# This read command alwasy returns with exit code 1
|
||||
read -d "\n" VER MANYLINUX ROCMBASE PINNED_COMMIT SHA256 < aotriton_version.txt || true
|
||||
ARCH=$(uname -m)
|
||||
AOTRITON_INSTALL_PREFIX="$1"
|
||||
AOTRITON_URL="https://github.com/ROCm/aotriton/releases/download/${VER}/aotriton-${VER}-${MANYLINUX}_${ARCH}-${ROCMBASE}-shared.tar.gz"
|
||||
|
||||
cd "${AOTRITON_INSTALL_PREFIX}"
|
||||
# Must use -L to follow redirects
|
||||
curl -L --retry 3 -o "${TARBALL}" "${AOTRITON_URL}"
|
||||
ACTUAL_SHA256=$(sha256sum "${TARBALL}" | cut -d " " -f 1)
|
||||
if [ "${SHA256}" != "${ACTUAL_SHA256}" ]; then
|
||||
echo -n "Error: The SHA256 of downloaded tarball is ${ACTUAL_SHA256},"
|
||||
echo " which does not match the expected value ${SHA256}."
|
||||
exit
|
||||
fi
|
||||
tar xf "${TARBALL}" && rm -rf "${TARBALL}"
|
||||
@ -32,12 +32,8 @@ install_ubuntu() {
|
||||
|
||||
# HACK: UCC testing relies on libnccl library from NVIDIA repo, and version 2.16 crashes
|
||||
# See https://github.com/pytorch/pytorch/pull/105260#issuecomment-1673399729
|
||||
# TODO: Eliminate this hack, we should not relay on apt-get installation
|
||||
# See https://github.com/pytorch/pytorch/issues/144768
|
||||
if [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "11.8"* ]]; then
|
||||
maybe_libnccl_dev="libnccl2=2.15.5-1+cuda11.8 libnccl-dev=2.15.5-1+cuda11.8 --allow-downgrades --allow-change-held-packages"
|
||||
elif [[ "$UBUNTU_VERSION" == "20.04"* && "$CUDA_VERSION" == "12.4"* ]]; then
|
||||
maybe_libnccl_dev="libnccl2=2.26.2-1+cuda12.4 libnccl-dev=2.26.2-1+cuda12.4 --allow-downgrades --allow-change-held-packages"
|
||||
else
|
||||
maybe_libnccl_dev=""
|
||||
fi
|
||||
@ -80,8 +76,7 @@ install_ubuntu() {
|
||||
vim \
|
||||
unzip \
|
||||
gpg-agent \
|
||||
gdb \
|
||||
bc
|
||||
gdb
|
||||
|
||||
# Should resolve issues related to various apt package repository cert issues
|
||||
# see: https://github.com/pytorch/pytorch/issues/65931
|
||||
|
||||
@ -9,7 +9,12 @@ install_ubuntu() {
|
||||
# Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
|
||||
apt-get install -y cargo
|
||||
echo "Checking out sccache repo"
|
||||
git clone https://github.com/mozilla/sccache -b v0.9.1
|
||||
if [ -n "$CUDA_VERSION" ]; then
|
||||
# TODO: Remove this
|
||||
git clone https://github.com/pytorch/sccache
|
||||
else
|
||||
git clone https://github.com/mozilla/sccache -b v0.8.2
|
||||
fi
|
||||
cd sccache
|
||||
echo "Building sccache"
|
||||
cargo build --release
|
||||
@ -36,33 +41,41 @@ sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
|
||||
export PATH="/opt/cache/bin:$PATH"
|
||||
|
||||
# Setup compiler cache
|
||||
install_ubuntu
|
||||
if [ -n "$ROCM_VERSION" ]; then
|
||||
curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache
|
||||
else
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
if [ -n "$CUDA_VERSION" ]; then
|
||||
# TODO: Install the pre-built binary from S3 as building from source
|
||||
# https://github.com/pytorch/sccache has started failing mysteriously
|
||||
# in which sccache server couldn't start with the following error:
|
||||
# sccache: error: Invalid argument (os error 22)
|
||||
install_binary
|
||||
else
|
||||
install_ubuntu
|
||||
fi
|
||||
fi
|
||||
chmod a+x /opt/cache/bin/sccache
|
||||
|
||||
function write_sccache_stub() {
|
||||
# Unset LD_PRELOAD for ps because of asan + ps issues
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
|
||||
if [ $1 == "gcc" ]; then
|
||||
# Do not call sccache recursively when dumping preprocessor argument
|
||||
# For some reason it's very important for the first cached nvcc invocation
|
||||
cat >"/opt/cache/bin/$1" <<EOF
|
||||
# Do not call sccache recursively when dumping preprocessor argument
|
||||
# For some reason it's very important for the first cached nvcc invocation
|
||||
cat > "/opt/cache/bin/$1" <<EOF
|
||||
#!/bin/sh
|
||||
|
||||
# sccache does not support -E flag, so we need to call the original compiler directly in order to avoid calling this wrapper recursively
|
||||
for arg in "\$@"; do
|
||||
if [ "\$arg" = "-E" ]; then
|
||||
exec $(which $1) "\$@"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
|
||||
if [ "\$1" = "-E" ] || [ "\$2" = "-E" ]; then
|
||||
exec $(which $1) "\$@"
|
||||
elif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
|
||||
exec sccache $(which $1) "\$@"
|
||||
else
|
||||
exec $(which $1) "\$@"
|
||||
fi
|
||||
EOF
|
||||
else
|
||||
cat >"/opt/cache/bin/$1" <<EOF
|
||||
cat > "/opt/cache/bin/$1" <<EOF
|
||||
#!/bin/sh
|
||||
|
||||
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
|
||||
@ -112,7 +125,7 @@ if [ -n "$ROCM_VERSION" ]; then
|
||||
TOPDIR=$(dirname $OLDCOMP)
|
||||
WRAPPED="$TOPDIR/original/$COMPNAME"
|
||||
mv "$OLDCOMP" "$WRAPPED"
|
||||
printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" >"$OLDCOMP"
|
||||
printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" > "$OLDCOMP"
|
||||
chmod a+x "$OLDCOMP"
|
||||
}
|
||||
|
||||
|
||||
@ -4,10 +4,16 @@ set -ex
|
||||
|
||||
if [ -n "$CLANG_VERSION" ]; then
|
||||
|
||||
if [[ $UBUNTU_VERSION == 22.04 ]]; then
|
||||
if [[ $CLANG_VERSION == 9 && $UBUNTU_VERSION == 18.04 ]]; then
|
||||
sudo apt-get update
|
||||
# gpg-agent is not available by default on 18.04
|
||||
sudo apt-get install -y --no-install-recommends gpg-agent
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-${CLANG_VERSION} main"
|
||||
elif [[ $UBUNTU_VERSION == 22.04 ]]; then
|
||||
# work around ubuntu apt-get conflicts
|
||||
sudo apt-get -y -f install
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
|
||||
if [[ $CLANG_VERSION == 18 ]]; then
|
||||
apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
|
||||
fi
|
||||
@ -35,7 +41,7 @@ if [ -n "$CLANG_VERSION" ]; then
|
||||
# clang's packaging is a little messed up (the runtime libs aren't
|
||||
# added into the linker path), so give it a little help
|
||||
clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
|
||||
echo "$clang_lib" >/etc/ld.so.conf.d/clang.conf
|
||||
echo "$clang_lib" > /etc/ld.so.conf.d/clang.conf
|
||||
ldconfig
|
||||
|
||||
# Cleanup package manager
|
||||
|
||||
@ -25,8 +25,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
mkdir -p /opt/conda
|
||||
chown jenkins:jenkins /opt/conda
|
||||
|
||||
SCRIPT_FOLDER="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
source "${SCRIPT_FOLDER}/common_utils.sh"
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
pushd /tmp
|
||||
wget -q "${BASE_URL}/${CONDA_FILE}"
|
||||
@ -62,11 +61,11 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
|
||||
# libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30
|
||||
# which is provided in libstdcxx 12 and up.
|
||||
conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge
|
||||
conda_install libstdcxx-ng=12.3.0 -c conda-forge
|
||||
|
||||
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
|
||||
if [[ $(uname -m) == "aarch64" ]]; then
|
||||
conda_install "openblas==0.3.29=*openmp*"
|
||||
conda_install "openblas==0.3.25=*openmp*"
|
||||
else
|
||||
conda_install "mkl=2021.4.0 mkl-include=2021.4.0"
|
||||
fi
|
||||
@ -85,9 +84,8 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
|
||||
# Magma package names are concatenation of CUDA major and minor ignoring revision
|
||||
# I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
|
||||
# Magma is installed from a tarball in the ossci-linux bucket into the conda env
|
||||
if [ -n "$CUDA_VERSION" ]; then
|
||||
${SCRIPT_FOLDER}/install_magma_conda.sh $(cut -f1-2 -d'.' <<< ${CUDA_VERSION}) ${ANACONDA_PYTHON_VERSION}
|
||||
conda_install magma-cuda$(TMP=${CUDA_VERSION/./};echo ${TMP%.*[0-9]}) -c pytorch
|
||||
fi
|
||||
|
||||
# Install some other packages, including those needed for Python test reporting
|
||||
|
||||
@ -7,7 +7,7 @@ PYTHON_DOWNLOAD_GITHUB_BRANCH=https://github.com/python/cpython/archive/refs/hea
|
||||
GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
|
||||
|
||||
# Python versions to be installed in /opt/$VERSION_NO
|
||||
CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"}
|
||||
CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.8.1 3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t"}
|
||||
|
||||
function check_var {
|
||||
if [ -z "$1" ]; then
|
||||
@ -70,7 +70,7 @@ function do_cpython_build {
|
||||
# install setuptools since python 3.12 is required to use distutils
|
||||
${prefix}/bin/pip install wheel==0.34.2 setuptools==68.2.2
|
||||
local abi_tag=$(${prefix}/bin/python -c "from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag; print('{0}{1}-{2}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))")
|
||||
ln -sf ${prefix} /opt/python/${abi_tag}
|
||||
ln -s ${prefix} /opt/python/${abi_tag}
|
||||
}
|
||||
|
||||
function build_cpython {
|
||||
|
||||
@ -2,6 +2,7 @@
|
||||
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=v2.21.5-1
|
||||
CUDNN_VERSION=9.5.1.17
|
||||
|
||||
function install_cusparselt_040 {
|
||||
@ -15,6 +16,17 @@ function install_cusparselt_040 {
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_cusparselt_052 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
mkdir tmp_cusparselt && pushd tmp_cusparselt
|
||||
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
|
||||
tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
|
||||
cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/
|
||||
cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_cusparselt_062 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
mkdir tmp_cusparselt && pushd tmp_cusparselt
|
||||
@ -26,20 +38,9 @@ function install_cusparselt_062 {
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_cusparselt_063 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
mkdir tmp_cusparselt && pushd tmp_cusparselt
|
||||
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
|
||||
tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
|
||||
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/
|
||||
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_118 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.4.0"
|
||||
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
|
||||
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
|
||||
# install CUDA 11.8.0 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run
|
||||
@ -57,16 +58,56 @@ function install_118 {
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
CUDA_VERSION=11.8 bash install_nccl.sh
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_040
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function install_121 {
|
||||
echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
|
||||
rm -rf /usr/local/cuda-12.1 /usr/local/cuda
|
||||
# install CUDA 12.1.0 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
|
||||
chmod +x cuda_12.1.1_530.30.02_linux.run
|
||||
./cuda_12.1.1_530.30.02_linux.run --toolkit --silent
|
||||
rm -f cuda_12.1.1_530.30.02_linux.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_052
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function install_124 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.2"
|
||||
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
|
||||
# install CUDA 12.4.1 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux.run
|
||||
@ -84,7 +125,14 @@ function install_124 {
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
CUDA_VERSION=12.4 bash install_nccl.sh
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_062
|
||||
|
||||
@ -92,13 +140,13 @@ function install_124 {
|
||||
}
|
||||
|
||||
function install_126 {
|
||||
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.3"
|
||||
echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
|
||||
# install CUDA 12.6.3 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run
|
||||
chmod +x cuda_12.6.3_560.35.05_linux.run
|
||||
./cuda_12.6.3_560.35.05_linux.run --toolkit --silent
|
||||
rm -f cuda_12.6.3_560.35.05_linux.run
|
||||
# install CUDA 12.6.2 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux.run
|
||||
chmod +x cuda_12.6.2_560.35.03_linux.run
|
||||
./cuda_12.6.2_560.35.03_linux.run --toolkit --silent
|
||||
rm -f cuda_12.6.2_560.35.03_linux.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
@ -110,9 +158,16 @@ function install_126 {
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
CUDA_VERSION=12.6 bash install_nccl.sh
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_063
|
||||
install_cusparselt_062
|
||||
|
||||
ldconfig
|
||||
}
|
||||
@ -148,6 +203,37 @@ function prune_118 {
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/
|
||||
}
|
||||
|
||||
function prune_121 {
|
||||
echo "Pruning CUDA 12.1"
|
||||
#####################################################################################
|
||||
# CUDA 12.1 prune static libs
|
||||
#####################################################################################
|
||||
export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune"
|
||||
export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64"
|
||||
|
||||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
|
||||
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||
export GENCODE=$OVERRIDE_GENCODE
|
||||
fi
|
||||
|
||||
# all CUDA libs except CuDNN and CuBLAS
|
||||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||
| xargs -I {} bash -c \
|
||||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||
|
||||
# prune CuDNN and CuBLAS
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.1 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.1/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/
|
||||
}
|
||||
|
||||
function prune_124 {
|
||||
echo "Pruning CUDA 12.4"
|
||||
#####################################################################################
|
||||
@ -216,45 +302,18 @@ function prune_126 {
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
|
||||
}
|
||||
|
||||
function install_128 {
|
||||
CUDNN_VERSION=9.8.0.87
|
||||
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
|
||||
# install CUDA 12.8.0 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux.run
|
||||
chmod +x cuda_12.8.0_570.86.10_linux.run
|
||||
./cuda_12.8.0_570.86.10_linux.run --toolkit --silent
|
||||
rm -f cuda_12.8.0_570.86.10_linux.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
CUDA_VERSION=12.8 bash install_nccl.sh
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
# idiomatic parameter and option handling in sh
|
||||
while test $# -gt 0
|
||||
do
|
||||
case "$1" in
|
||||
11.8) install_118; prune_118
|
||||
;;
|
||||
12.1) install_121; prune_121
|
||||
;;
|
||||
12.4) install_124; prune_124
|
||||
;;
|
||||
12.6) install_126; prune_126
|
||||
;;
|
||||
12.8) install_128;
|
||||
;;
|
||||
*) echo "bad argument $1"; exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -3,28 +3,30 @@
|
||||
|
||||
set -ex
|
||||
|
||||
CUDNN_VERSION=9.8.0.87
|
||||
NCCL_VERSION=v2.21.5-1
|
||||
CUDNN_VERSION=9.5.1.17
|
||||
|
||||
function install_cusparselt_063 {
|
||||
function install_cusparselt_062 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
mkdir tmp_cusparselt && pushd tmp_cusparselt
|
||||
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz
|
||||
tar xf libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz
|
||||
cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/include/* /usr/local/cuda/include/
|
||||
cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
|
||||
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
|
||||
tar xf libcusparse_lt-linux-sbsa-0.6.2.3-archive.tar.xz
|
||||
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/include/* /usr/local/cuda/include/
|
||||
cp -a libcusparse_lt-linux-sbsa-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_128 {
|
||||
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
|
||||
# install CUDA 12.8.0 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.8.0/local_installers/cuda_12.8.0_570.86.10_linux_sbsa.run
|
||||
chmod +x cuda_12.8.0_570.86.10_linux_sbsa.run
|
||||
./cuda_12.8.0_570.86.10_linux_sbsa.run --toolkit --silent
|
||||
rm -f cuda_12.8.0_570.86.10_linux_sbsa.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.8 /usr/local/cuda
|
||||
function install_124 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
|
||||
# install CUDA 12.4.1 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
chmod +x cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
./cuda_12.4.1_550.54.15_linux_sbsa.run --toolkit --silent
|
||||
rm -f cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.4 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
@ -35,18 +37,125 @@ function install_128 {
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
CUDA_VERSION=12.8 bash install_nccl.sh
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_063
|
||||
install_cusparselt_062
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function prune_124 {
|
||||
echo "Pruning CUDA 12.4"
|
||||
#####################################################################################
|
||||
# CUDA 12.4 prune static libs
|
||||
#####################################################################################
|
||||
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
|
||||
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
|
||||
|
||||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
|
||||
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||
export GENCODE=$OVERRIDE_GENCODE
|
||||
fi
|
||||
|
||||
# all CUDA libs except CuDNN and CuBLAS
|
||||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||
| xargs -I {} bash -c \
|
||||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||
|
||||
# prune CuDNN and CuBLAS
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.4 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.4/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
|
||||
}
|
||||
|
||||
function install_126 {
|
||||
echo "Installing CUDA 12.6.2 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
|
||||
# install CUDA 12.6.2 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.2/local_installers/cuda_12.6.2_560.35.03_linux_sbsa.run
|
||||
chmod +x cuda_12.6.2_560.35.03_linux_sbsa.run
|
||||
./cuda_12.6.2_560.35.03_linux_sbsa.run --toolkit --silent
|
||||
rm -f cuda_12.6.2_560.35.03_linux_sbsa.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_062
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function prune_126 {
|
||||
echo "Pruning CUDA 12.6"
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune static libs
|
||||
#####################################################################################
|
||||
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
|
||||
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
|
||||
|
||||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
|
||||
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||
export GENCODE=$OVERRIDE_GENCODE
|
||||
fi
|
||||
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
|
||||
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
|
||||
fi
|
||||
|
||||
# all CUDA libs except CuDNN and CuBLAS
|
||||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||
| xargs -I {} bash -c \
|
||||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||
|
||||
# prune CuDNN and CuBLAS
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.6/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
|
||||
}
|
||||
|
||||
# idiomatic parameter and option handling in sh
|
||||
while test $# -gt 0
|
||||
do
|
||||
case "$1" in
|
||||
12.8) install_128;
|
||||
12.4) install_124; prune_124
|
||||
;;
|
||||
12.6) install_126; prune_126
|
||||
;;
|
||||
*) echo "bad argument $1"; exit 1
|
||||
;;
|
||||
|
||||
@ -4,9 +4,7 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn
|
||||
pushd tmp_cudnn
|
||||
if [[ ${CUDA_VERSION:0:4} == "12.8" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.8.0.87_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
|
||||
if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive"
|
||||
|
||||
@ -5,15 +5,7 @@ set -ex
|
||||
# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
mkdir tmp_cusparselt && cd tmp_cusparselt
|
||||
|
||||
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-8]$ ]]; then
|
||||
arch_path='sbsa'
|
||||
export TARGETARCH=${TARGETARCH:-$(uname -m)}
|
||||
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
|
||||
arch_path='x86_64'
|
||||
fi
|
||||
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.3.2-archive"
|
||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
|
||||
elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
|
||||
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
|
||||
arch_path='sbsa'
|
||||
export TARGETARCH=${TARGETARCH:-$(uname -m)}
|
||||
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
|
||||
@ -21,11 +13,17 @@ elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
|
||||
fi
|
||||
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.2.3-archive"
|
||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
|
||||
elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
|
||||
arch_path='sbsa'
|
||||
export TARGETARCH=${TARGETARCH:-$(uname -m)}
|
||||
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
|
||||
arch_path='x86_64'
|
||||
fi
|
||||
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.5.2.1-archive"
|
||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
|
||||
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
|
||||
CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive"
|
||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
|
||||
else
|
||||
echo "Not sure which libcusparselt version to install for this ${CUDA_VERSION}"
|
||||
fi
|
||||
|
||||
tar xf ${CUSPARSELT_NAME}.tar.xz
|
||||
|
||||
38
.ci/docker/common/install_db.sh
Executable file
38
.ci/docker/common/install_db.sh
Executable file
@ -0,0 +1,38 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
install_ubuntu() {
|
||||
apt-get update
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
}
|
||||
|
||||
install_centos() {
|
||||
# Need EPEL for many packages we depend on.
|
||||
# See http://fedoraproject.org/wiki/EPEL
|
||||
yum --enablerepo=extras install -y epel-release
|
||||
|
||||
# Cleanup
|
||||
yum clean all
|
||||
rm -rf /var/cache/yum
|
||||
rm -rf /var/lib/yum/yumdb
|
||||
rm -rf /var/lib/yum/history
|
||||
}
|
||||
|
||||
# Install base packages depending on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
ubuntu)
|
||||
install_ubuntu
|
||||
;;
|
||||
centos)
|
||||
install_centos
|
||||
;;
|
||||
*)
|
||||
echo "Unable to determine OS..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@ -37,22 +37,20 @@ install_conda_dependencies() {
|
||||
|
||||
install_pip_dependencies() {
|
||||
pushd executorch
|
||||
as_jenkins bash install_executorch.sh
|
||||
|
||||
# A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current
|
||||
# numba and scipy version used in PyTorch CI
|
||||
conda_run pip uninstall -y numba scipy
|
||||
|
||||
as_jenkins bash install_requirements.sh --pybind xnnpack
|
||||
popd
|
||||
}
|
||||
|
||||
setup_executorch() {
|
||||
pushd executorch
|
||||
# Setup swiftshader and Vulkan SDK which are required to build the Vulkan delegate
|
||||
as_jenkins bash .ci/scripts/setup-vulkan-linux-deps.sh
|
||||
|
||||
export PYTHON_EXECUTABLE=python
|
||||
export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||
export EXECUTORCH_BUILD_PYBIND=ON
|
||||
export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||
|
||||
as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
|
||||
as_jenkins .ci/scripts/setup-linux.sh cmake || true
|
||||
popd
|
||||
}
|
||||
|
||||
|
||||
@ -35,9 +35,7 @@ git clone https://github.com/halide/Halide.git
|
||||
pushd Halide
|
||||
git checkout ${COMMIT} && git submodule update --init --recursive
|
||||
pip_install -r requirements.txt
|
||||
# NOTE: pybind has a requirement for cmake > 3.5 so set the minimum cmake version here with a flag
|
||||
# Context: https://github.com/pytorch/pytorch/issues/150420
|
||||
cmake -G Ninja -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_BUILD_TYPE=Release -S . -B build
|
||||
cmake -G Ninja -DCMAKE_BUILD_TYPE=Release -S . -B build
|
||||
cmake --build build
|
||||
test -e ${CONDA_PREFIX}/lib/python3 || ln -s python${ANACONDA_PYTHON_VERSION} ${CONDA_PREFIX}/lib/python3
|
||||
cmake --install build --prefix ${CONDA_PREFIX}
|
||||
|
||||
@ -7,20 +7,14 @@ source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
function install_huggingface() {
|
||||
local version
|
||||
commit=$(get_pinned_commit huggingface)
|
||||
pip_install pandas==2.0.3
|
||||
pip_install "git+https://github.com/huggingface/transformers@${commit}"
|
||||
}
|
||||
|
||||
function install_timm() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit timm)
|
||||
|
||||
# TODO (huydhn): There is no torchvision release on 3.13 when I write this, so
|
||||
# I'm using nightly here instead. We just need to package to be able to install
|
||||
# TIMM. Removing this once vision has a release on 3.13
|
||||
if [[ "${ANACONDA_PYTHON_VERSION}" == "3.13" ]]; then
|
||||
pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cu124
|
||||
fi
|
||||
|
||||
pip_install pandas==2.0.3
|
||||
pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
|
||||
# Clean up
|
||||
conda_run pip uninstall -y cmake torch torchvision triton
|
||||
|
||||
@ -2,6 +2,8 @@
|
||||
|
||||
set -ex
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
if [ -n "${UBUNTU_VERSION}" ]; then
|
||||
apt update
|
||||
apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
|
||||
@ -13,8 +15,8 @@ chown -R jenkins pytorch
|
||||
|
||||
pushd pytorch
|
||||
# Install all linter dependencies
|
||||
pip install -r requirements.txt
|
||||
lintrunner init
|
||||
pip_install -r requirements.txt
|
||||
conda_run lintrunner init
|
||||
|
||||
# Cache .lintbin directory as part of the Docker image
|
||||
cp -r .lintbin /tmp
|
||||
|
||||
@ -3,6 +3,8 @@
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
MAGMA_VERSION="2.5.2"
|
||||
|
||||
function do_install() {
|
||||
cuda_version=$1
|
||||
cuda_version_nodot=${1/./}
|
||||
@ -15,7 +17,7 @@ function do_install() {
|
||||
set -x
|
||||
tmp_dir=$(mktemp -d)
|
||||
pushd ${tmp_dir}
|
||||
curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
|
||||
curl -OLs https://anaconda.org/pytorch/magma-cuda${cuda_version_nodot}/${MAGMA_VERSION}/download/linux-64/${magma_archive}
|
||||
tar -xvf "${magma_archive}"
|
||||
mkdir -p "${cuda_dir}/magma"
|
||||
mv include "${cuda_dir}/magma/include"
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Script that replaces the magma install from a conda package
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
function do_install() {
|
||||
cuda_version_nodot=${1/./}
|
||||
anaconda_python_version=$2
|
||||
|
||||
MAGMA_VERSION="2.6.1"
|
||||
magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
||||
|
||||
anaconda_dir="/opt/conda/envs/py_${anaconda_python_version}"
|
||||
(
|
||||
set -x
|
||||
tmp_dir=$(mktemp -d)
|
||||
pushd ${tmp_dir}
|
||||
curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
|
||||
tar -xvf "${magma_archive}"
|
||||
mv include/* "${anaconda_dir}/include/"
|
||||
mv lib/* "${anaconda_dir}/lib"
|
||||
popd
|
||||
)
|
||||
}
|
||||
|
||||
do_install $1 $2
|
||||
@ -16,7 +16,7 @@ case "$ID" in
|
||||
ubuntu)
|
||||
IS_UBUNTU=1
|
||||
;;
|
||||
centos|almalinux)
|
||||
centos)
|
||||
IS_UBUNTU=0
|
||||
;;
|
||||
*)
|
||||
@ -43,6 +43,12 @@ else
|
||||
fi
|
||||
ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH))
|
||||
|
||||
# Install custom MIOpen + COMgr for ROCm >= 4.0.1
|
||||
if [[ $ROCM_INT -lt 40001 ]]; then
|
||||
echo "ROCm version < 4.0.1; will not install custom MIOpen"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Function to retry functions that sometimes timeout or have flaky failures
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
@ -60,27 +66,55 @@ else
|
||||
ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}"
|
||||
fi
|
||||
|
||||
# MIOPEN_USE_HIP_KERNELS is a Workaround for COMgr issues
|
||||
MIOPEN_CMAKE_COMMON_FLAGS="
|
||||
-DMIOPEN_USE_COMGR=ON
|
||||
-DMIOPEN_BUILD_DRIVER=OFF
|
||||
"
|
||||
if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60204 ]]; then
|
||||
MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
|
||||
else
|
||||
echo "ROCm ${ROCM_VERSION} does not need any patches, do not build from source"
|
||||
# Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
|
||||
if [[ $ROCM_INT -ge 60300 ]]; then
|
||||
echo "ROCm 6.3+ MIOpen does not need any patches, do not build from source"
|
||||
exit 0
|
||||
elif [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
|
||||
MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
|
||||
elif [[ $ROCM_INT -ge 60100 ]] && [[ $ROCM_INT -lt 60200 ]]; then
|
||||
echo "ROCm 6.1 MIOpen does not need any patches, do not build from source"
|
||||
exit 0
|
||||
elif [[ $ROCM_INT -ge 60000 ]] && [[ $ROCM_INT -lt 60100 ]]; then
|
||||
echo "ROCm 6.0 MIOpen does not need any patches, do not build from source"
|
||||
exit 0
|
||||
elif [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 60000 ]]; then
|
||||
echo "ROCm 5.7 MIOpen does not need any patches, do not build from source"
|
||||
exit 0
|
||||
elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.6-staging"
|
||||
elif [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11"
|
||||
elif [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.4-staging"
|
||||
elif [[ $ROCM_INT -ge 50300 ]] && [[ $ROCM_INT -lt 50400 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.3-staging"
|
||||
elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50300 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.2-staging"
|
||||
elif [[ $ROCM_INT -ge 50100 ]] && [[ $ROCM_INT -lt 50200 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.1-staging"
|
||||
elif [[ $ROCM_INT -ge 50000 ]] && [[ $ROCM_INT -lt 50100 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.0-staging"
|
||||
else
|
||||
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
if [[ ${IS_UBUNTU} == 1 ]]; then
|
||||
apt-get remove -y miopen-hip
|
||||
else
|
||||
# Workaround since almalinux manylinux image already has this and cget doesn't like that
|
||||
rm -rf /usr/local/lib/pkgconfig/sqlite3.pc
|
||||
|
||||
# Versioned package name needs regex match
|
||||
# Use --noautoremove to prevent other rocm packages from being uninstalled
|
||||
yum remove -y miopen-hip* --noautoremove
|
||||
yum remove -y miopen-hip
|
||||
fi
|
||||
|
||||
git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
|
||||
@ -88,7 +122,16 @@ pushd MIOpen
|
||||
# remove .git to save disk space since CI runner was running out
|
||||
rm -rf .git
|
||||
# Don't build CK to save docker build time
|
||||
sed -i '/composable_kernel/d' requirements.txt
|
||||
if [[ $ROCM_INT -ge 60200 ]]; then
|
||||
sed -i '/composable_kernel/d' requirements.txt
|
||||
fi
|
||||
# Don't build MLIR to save docker build time
|
||||
# since we are disabling MLIR backend for MIOpen anyway
|
||||
if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
|
||||
sed -i '/rocMLIR/d' requirements.txt
|
||||
elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then
|
||||
sed -i '/llvm-project-mlir/d' requirements.txt
|
||||
fi
|
||||
## MIOpen minimum requirements
|
||||
cmake -P install_deps.cmake --minimum
|
||||
|
||||
@ -110,7 +153,7 @@ cd build
|
||||
PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang++ cmake .. \
|
||||
${MIOPEN_CMAKE_COMMON_FLAGS} \
|
||||
${MIOPEN_CMAKE_DB_FLAGS} \
|
||||
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}"
|
||||
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}"
|
||||
make MIOpen -j $(nproc)
|
||||
|
||||
# Build MIOpen package
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=""
|
||||
if [[ ${CUDA_VERSION:0:2} == "11" ]]; then
|
||||
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu11.txt)
|
||||
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
|
||||
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu12.txt)
|
||||
else
|
||||
echo "Unexpected CUDA_VERSION ${CUDA_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -n "${NCCL_VERSION}" ]]; then
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
pushd nccl
|
||||
make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf nccl
|
||||
ldconfig
|
||||
fi
|
||||
@ -4,15 +4,10 @@ set -ex
|
||||
|
||||
[ -n "$NINJA_VERSION" ]
|
||||
|
||||
arch=$(uname -m)
|
||||
if [ "$arch" == "aarch64" ]; then
|
||||
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux-aarch64.zip"
|
||||
else
|
||||
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
|
||||
fi
|
||||
url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
|
||||
|
||||
pushd /tmp
|
||||
wget --no-verbose --output-document=ninja-linux.zip "$url"
|
||||
unzip ninja-linux.zip -d /usr/local/bin
|
||||
rm -f ninja-linux.zip
|
||||
popd
|
||||
popd
|
||||
|
||||
@ -31,15 +31,15 @@ pip_install \
|
||||
pip_install coloredlogs packaging
|
||||
|
||||
pip_install onnxruntime==1.18.1
|
||||
pip_install onnx==1.17.0
|
||||
pip_install onnxscript==0.2.2 --no-deps
|
||||
pip_install onnx==1.16.2
|
||||
pip_install onnxscript==0.1.0.dev20241009 --no-deps
|
||||
# required by onnxscript
|
||||
pip_install ml_dtypes
|
||||
|
||||
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
|
||||
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
|
||||
IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
|
||||
as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}"
|
||||
as_jenkins echo 'import transformers; transformers.AutoModel.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoTokenizer.from_pretrained("sshleifer/tiny-gpt2"); transformers.AutoModelForSpeechSeq2Seq.from_pretrained("openai/whisper-large-v3");' > "${IMPORT_SCRIPT_FILENAME}"
|
||||
|
||||
# Need a PyTorch version for transformers to work
|
||||
pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
set -ex
|
||||
|
||||
cd /
|
||||
git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.29 --depth 1 --shallow-submodules
|
||||
git clone https://github.com/OpenMathLib/OpenBLAS.git -b v0.3.25 --depth 1 --shallow-submodules
|
||||
|
||||
|
||||
OPENBLAS_BUILD_FLAGS="
|
||||
|
||||
@ -1,18 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -ex
|
||||
|
||||
apt-get update
|
||||
# Use deadsnakes in case we need an older python version
|
||||
sudo add-apt-repository ppa:deadsnakes/ppa
|
||||
apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python3-pip python${PYTHON_VERSION}-venv
|
||||
|
||||
# Use a venv because uv and some other package managers don't support --user install
|
||||
ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
|
||||
python -m venv /var/lib/jenkins/ci_env
|
||||
source /var/lib/jenkins/ci_env/bin/activate
|
||||
|
||||
python -mpip install --upgrade pip
|
||||
python -mpip install -r /opt/requirements-ci.txt
|
||||
if [ -n "${PIP_CMAKE}" ]; then
|
||||
python -mpip install cmake==3.31.6
|
||||
fi
|
||||
@ -8,6 +8,10 @@ ver() {
|
||||
|
||||
install_ubuntu() {
|
||||
apt-get update
|
||||
if [[ $UBUNTU_VERSION == 18.04 ]]; then
|
||||
# gpg-agent is not available by default on 18.04
|
||||
apt-get install -y --no-install-recommends gpg-agent
|
||||
fi
|
||||
if [[ $UBUNTU_VERSION == 20.04 ]]; then
|
||||
# gpg-agent is not available by default on 20.04
|
||||
apt-get install -y --no-install-recommends gpg-agent
|
||||
@ -19,13 +23,6 @@ install_ubuntu() {
|
||||
apt-get install -y libc++1
|
||||
apt-get install -y libc++abi1
|
||||
|
||||
# Make sure rocm packages from repo.radeon.com have highest priority
|
||||
cat << EOF > /etc/apt/preferences.d/rocm-pin-600
|
||||
Package: *
|
||||
Pin: release o=repo.radeon.com
|
||||
Pin-Priority: 600
|
||||
EOF
|
||||
|
||||
# Add amdgpu repository
|
||||
UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
|
||||
echo "deb [arch=amd64] https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
|
||||
@ -65,22 +62,6 @@ EOF
|
||||
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
|
||||
done
|
||||
|
||||
# ROCm 6.3 had a regression where initializing static code objects had significant overhead
|
||||
if [[ $(ver $ROCM_VERSION) -eq $(ver 6.3) ]]; then
|
||||
# clr build needs CppHeaderParser but can only find it using conda's python
|
||||
/opt/conda/bin/python -m pip install CppHeaderParser
|
||||
git clone https://github.com/ROCm/HIP -b rocm-6.3.x
|
||||
HIP_COMMON_DIR=$(readlink -f HIP)
|
||||
git clone https://github.com/jeffdaily/clr -b release/rocm-rel-6.3-statco-hotfix
|
||||
mkdir -p clr/build
|
||||
pushd clr/build
|
||||
cmake .. -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR
|
||||
make -j
|
||||
cp hipamd/lib/libamdhip64.so.6.3.* /opt/rocm/lib/libamdhip64.so.6.3.*
|
||||
popd
|
||||
rm -rf HIP clr
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
@ -12,7 +12,7 @@ case "$ID" in
|
||||
apt-get install -y libpciaccess-dev pkg-config
|
||||
apt-get clean
|
||||
;;
|
||||
centos|almalinux)
|
||||
centos)
|
||||
yum install -y libpciaccess-devel pkgconfig
|
||||
;;
|
||||
*)
|
||||
@ -115,7 +115,7 @@ index a5007ffc..13fa07fc 100644
|
||||
if (!fp) {
|
||||
- fprintf(stderr, "%s: %s\n", AMDGPU_ASIC_ID_TABLE,
|
||||
- strerror(errno));
|
||||
+ //fprintf(stderr, "amdgpu.ids: No such file or directory\n");
|
||||
+ fprintf(stderr, "amdgpu.ids: No such file or directory\n");
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
@ -1,32 +1,38 @@
|
||||
#!/usr/bin/env bash
|
||||
# Script used only in CD pipeline
|
||||
#!/bin/bash
|
||||
# Script used in CI and CD pipeline
|
||||
|
||||
set -eou pipefail
|
||||
set -ex
|
||||
|
||||
function do_install() {
|
||||
rocm_version=$1
|
||||
rocm_version_nodot=${1//./}
|
||||
|
||||
# Version 2.7.2 + ROCm related updates
|
||||
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
|
||||
magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
|
||||
MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION}
|
||||
|
||||
rocm_dir="/opt/rocm"
|
||||
(
|
||||
set -x
|
||||
tmp_dir=$(mktemp -d)
|
||||
pushd ${tmp_dir}
|
||||
curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
|
||||
if tar -xvf "${magma_archive}"
|
||||
then
|
||||
mkdir -p "${rocm_dir}/magma"
|
||||
mv include "${rocm_dir}/magma/include"
|
||||
mv lib "${rocm_dir}/magma/lib"
|
||||
else
|
||||
echo "${magma_archive} not found, skipping magma install"
|
||||
fi
|
||||
popd
|
||||
)
|
||||
}
|
||||
# "install" hipMAGMA into /opt/rocm/magma by copying after build
|
||||
git clone https://bitbucket.org/icl/magma.git
|
||||
pushd magma
|
||||
|
||||
do_install $1
|
||||
# Version 2.7.2 + ROCm related updates
|
||||
git checkout a1625ff4d9bc362906bd01f805dbbe12612953f6
|
||||
|
||||
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
|
||||
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
|
||||
if [[ -f "${MKLROOT}/lib/libmkl_core.a" ]]; then
|
||||
echo 'LIB = -Wl,--start-group -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -Wl,--end-group -lpthread -lstdc++ -lm -lgomp -lhipblas -lhipsparse' >> make.inc
|
||||
fi
|
||||
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib -ldl' >> make.inc
|
||||
echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
|
||||
export PATH="${PATH}:/opt/rocm/bin"
|
||||
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
|
||||
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
|
||||
else
|
||||
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
|
||||
fi
|
||||
for arch in $amdgpu_targets; do
|
||||
echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
|
||||
done
|
||||
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
|
||||
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
|
||||
make -f make.gen.hipMAGMA -j $(nproc)
|
||||
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}"
|
||||
make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}"
|
||||
popd
|
||||
mv magma /opt/rocm
|
||||
|
||||
24
.ci/docker/common/install_swiftshader.sh
Executable file
24
.ci/docker/common/install_swiftshader.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${SWIFTSHADER}" ]
|
||||
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
|
||||
|
||||
# SwiftShader
|
||||
_swiftshader_dir=/var/lib/jenkins/swiftshader
|
||||
_swiftshader_file_targz=swiftshader-abe07b943-prebuilt.tar.gz
|
||||
mkdir -p $_swiftshader_dir
|
||||
_tmp_swiftshader_targz="/tmp/${_swiftshader_file_targz}"
|
||||
|
||||
curl --silent --show-error --location --fail --retry 3 \
|
||||
--output "${_tmp_swiftshader_targz}" "$_https_amazon_aws/${_swiftshader_file_targz}"
|
||||
|
||||
tar -C "${_swiftshader_dir}" -xzf "${_tmp_swiftshader_targz}"
|
||||
|
||||
export VK_ICD_FILENAMES="${_swiftshader_dir}/build/Linux/vk_swiftshader_icd.json"
|
||||
@ -2,12 +2,6 @@
|
||||
|
||||
set -ex
|
||||
|
||||
mkdir -p /opt/triton
|
||||
if [ -z "${TRITON}" ] && [ -z "${TRITON_CPU}" ]; then
|
||||
echo "TRITON and TRITON_CPU are not set. Exiting..."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
get_conda_version() {
|
||||
@ -58,7 +52,6 @@ cd triton
|
||||
as_jenkins git checkout ${TRITON_PINNED_COMMIT}
|
||||
as_jenkins git submodule update --init --recursive
|
||||
cd python
|
||||
pip_install pybind11==2.13.6
|
||||
|
||||
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
|
||||
as_jenkins sed -i -e 's/https:\/\/tritonlang.blob.core.windows.net\/llvm-builds/https:\/\/oaitriton.blob.core.windows.net\/public\/llvm-builds/g' setup.py
|
||||
@ -67,22 +60,17 @@ if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}"
|
||||
# Triton needs at least gcc-9 to build
|
||||
apt-get install -y g++-9
|
||||
|
||||
CXX=g++-9 conda_run python setup.py bdist_wheel
|
||||
CXX=g++-9 pip_install -e .
|
||||
elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then
|
||||
# Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
|
||||
add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
apt-get install -y g++-9
|
||||
|
||||
CXX=g++-9 conda_run python setup.py bdist_wheel
|
||||
CXX=g++-9 pip_install -e .
|
||||
else
|
||||
conda_run python setup.py bdist_wheel
|
||||
pip_install -e .
|
||||
fi
|
||||
|
||||
# Copy the wheel to /opt for multi stage docker builds
|
||||
cp dist/*.whl /opt/triton
|
||||
# Install the wheel for docker builds that don't use multi stage
|
||||
pip_install dist/*.whl
|
||||
|
||||
if [ -n "${CONDA_CMAKE}" ]; then
|
||||
# TODO: This is to make sure that the same cmake and numpy version from install conda
|
||||
# script is used. Without this step, the newer cmake version (3.25.2) downloaded by
|
||||
|
||||
@ -8,12 +8,6 @@ else
|
||||
with_cuda=no
|
||||
fi
|
||||
|
||||
if [[ -d "/opt/rocm" ]]; then
|
||||
with_rocm=/opt/rocm
|
||||
else
|
||||
with_rocm=no
|
||||
fi
|
||||
|
||||
function install_ucx() {
|
||||
set -ex
|
||||
git clone --recursive https://github.com/openucx/ucx.git
|
||||
@ -25,7 +19,6 @@ function install_ucx() {
|
||||
./configure --prefix=$UCX_HOME \
|
||||
--enable-mt \
|
||||
--with-cuda=$with_cuda \
|
||||
--with-rocm=$with_rocm \
|
||||
--enable-profiling \
|
||||
--enable-stats
|
||||
time make -j
|
||||
@ -43,29 +36,12 @@ function install_ucc() {
|
||||
git submodule update --init --recursive
|
||||
|
||||
./autogen.sh
|
||||
|
||||
# We only run distributed tests on Tesla M60 and A10G
|
||||
NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
|
||||
|
||||
if [[ -n "$ROCM_VERSION" ]]; then
|
||||
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
|
||||
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
|
||||
else
|
||||
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
|
||||
fi
|
||||
for arch in $amdgpu_targets; do
|
||||
HIP_OFFLOAD="$HIP_OFFLOAD --offload-arch=$arch"
|
||||
done
|
||||
else
|
||||
HIP_OFFLOAD="all-arch-no-native"
|
||||
fi
|
||||
|
||||
./configure --prefix=$UCC_HOME \
|
||||
--with-ucx=$UCX_HOME \
|
||||
--with-cuda=$with_cuda \
|
||||
--with-nvcc-gencode="${NVCC_GENCODE}" \
|
||||
--with-rocm=$with_rocm \
|
||||
--with-rocm-arch="${HIP_OFFLOAD}"
|
||||
--with-nvcc-gencode="${NVCC_GENCODE}"
|
||||
time make -j
|
||||
sudo make install
|
||||
|
||||
|
||||
24
.ci/docker/common/install_vulkan_sdk.sh
Executable file
24
.ci/docker/common/install_vulkan_sdk.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${VULKAN_SDK_VERSION}" ]
|
||||
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
_vulkansdk_dir=/var/lib/jenkins/vulkansdk
|
||||
_tmp_vulkansdk_targz=/tmp/vulkansdk.tar.gz
|
||||
|
||||
curl \
|
||||
--silent \
|
||||
--show-error \
|
||||
--location \
|
||||
--fail \
|
||||
--retry 3 \
|
||||
--output "${_tmp_vulkansdk_targz}" "https://ossci-android.s3.amazonaws.com/vulkansdk-linux-x86_64-${VULKAN_SDK_VERSION}.tar.gz"
|
||||
|
||||
mkdir -p "${_vulkansdk_dir}"
|
||||
tar -C "${_vulkansdk_dir}" -xzf "${_tmp_vulkansdk_targz}" --strip-components 1
|
||||
rm -rf "${_tmp_vulkansdk_targz}"
|
||||
@ -49,8 +49,6 @@ RUN bash ./install_mkl.sh && rm install_mkl.sh
|
||||
FROM cpu as cuda
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
ADD ./common/install_magma.sh install_magma.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
ENV CUDA_HOME /usr/local/cuda
|
||||
|
||||
FROM cuda as cuda11.8
|
||||
@ -58,6 +56,11 @@ RUN bash ./install_cuda.sh 11.8
|
||||
RUN bash ./install_magma.sh 11.8
|
||||
RUN ln -sf /usr/local/cuda-11.8 /usr/local/cuda
|
||||
|
||||
FROM cuda as cuda12.1
|
||||
RUN bash ./install_cuda.sh 12.1
|
||||
RUN bash ./install_magma.sh 12.1
|
||||
RUN ln -sf /usr/local/cuda-12.1 /usr/local/cuda
|
||||
|
||||
FROM cuda as cuda12.4
|
||||
RUN bash ./install_cuda.sh 12.4
|
||||
RUN bash ./install_magma.sh 12.4
|
||||
@ -68,13 +71,7 @@ RUN bash ./install_cuda.sh 12.6
|
||||
RUN bash ./install_magma.sh 12.6
|
||||
RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda
|
||||
|
||||
FROM cuda as cuda12.8
|
||||
RUN bash ./install_cuda.sh 12.8
|
||||
RUN bash ./install_magma.sh 12.8
|
||||
RUN ln -sf /usr/local/cuda-12.8 /usr/local/cuda
|
||||
|
||||
FROM cpu as rocm
|
||||
ARG ROCM_VERSION
|
||||
ARG PYTORCH_ROCM_ARCH
|
||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||
ENV MKLROOT /opt/intel
|
||||
@ -89,11 +86,18 @@ ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
# gfortran and python needed for building magma from source for ROCm
|
||||
RUN apt-get update -y && \
|
||||
apt-get install gfortran -y && \
|
||||
apt-get install python3 python-is-python3 -y && \
|
||||
apt-get install python -y && \
|
||||
apt-get clean
|
||||
|
||||
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
|
||||
|
||||
# Install AOTriton
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./aotriton_version.txt aotriton_version.txt
|
||||
COPY ./common/install_aotriton.sh install_aotriton.sh
|
||||
RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt
|
||||
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
|
||||
|
||||
FROM ${BASE_TARGET} as final
|
||||
COPY --from=openssl /opt/openssl /opt/openssl
|
||||
|
||||
@ -1,63 +1,93 @@
|
||||
#!/usr/bin/env bash
|
||||
# Script used only in CD pipeline
|
||||
|
||||
set -eoux pipefail
|
||||
set -eou pipefail
|
||||
|
||||
image="$1"
|
||||
shift
|
||||
|
||||
if [ -z "${image}" ]; then
|
||||
echo "Usage: $0 IMAGENAME:ARCHTAG"
|
||||
echo "Usage: $0 IMAGE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
DOCKER_IMAGE="pytorch/${image}"
|
||||
|
||||
TOPDIR=$(git rev-parse --show-toplevel)
|
||||
|
||||
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
|
||||
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||
|
||||
WITH_PUSH=${WITH_PUSH:-}
|
||||
|
||||
DOCKER=${DOCKER:-docker}
|
||||
|
||||
# Go from imagename:tag to tag
|
||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
||||
|
||||
GPU_ARCH_VERSION=""
|
||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
||||
# extract cuda version from image name. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
||||
elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
|
||||
# extract rocm version from image name. e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
|
||||
fi
|
||||
|
||||
case ${DOCKER_TAG_PREFIX} in
|
||||
case ${GPU_ARCH_TYPE} in
|
||||
cpu)
|
||||
BASE_TARGET=cpu
|
||||
DOCKER_TAG=cpu
|
||||
GPU_IMAGE=ubuntu:20.04
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
;;
|
||||
cuda*)
|
||||
cuda)
|
||||
BASE_TARGET=cuda${GPU_ARCH_VERSION}
|
||||
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=ubuntu:20.04
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
;;
|
||||
rocm*)
|
||||
rocm)
|
||||
BASE_TARGET=rocm
|
||||
GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
|
||||
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=rocm/dev-ubuntu-20.04:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
|
||||
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
|
||||
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
|
||||
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
|
||||
else
|
||||
echo "ERROR: rocm regex failed"
|
||||
exit 1
|
||||
fi
|
||||
if [[ $ROCM_VERSION_INT -ge 60000 ]]; then
|
||||
PYTORCH_ROCM_ARCH+=";gfx942"
|
||||
fi
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH}"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unrecognized DOCKER_TAG_PREFIX: ${DOCKER_TAG_PREFIX}"
|
||||
echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
DOCKER_BUILDKIT=1 ${DOCKER} build \
|
||||
--target final \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
-t "${tmp_tag}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
(
|
||||
set -x
|
||||
DOCKER_BUILDKIT=1 ${DOCKER} build \
|
||||
--target final \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
-t "${DOCKER_IMAGE}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
|
||||
)
|
||||
|
||||
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
|
||||
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA}
|
||||
|
||||
if [[ "${WITH_PUSH}" == true ]]; then
|
||||
(
|
||||
set -x
|
||||
${DOCKER} push "${DOCKER_IMAGE}"
|
||||
if [[ -n ${GITHUB_REF} ]]; then
|
||||
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||
${DOCKER} tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG}
|
||||
${DOCKER} push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||
${DOCKER} push "${DOCKER_IMAGE_SHA_TAG}"
|
||||
fi
|
||||
)
|
||||
fi
|
||||
|
||||
@ -18,30 +18,27 @@ COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG PYTHON_VERSION
|
||||
ARG PIP_CMAKE
|
||||
# Put venv into the env vars so users don't need to activate it
|
||||
ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
|
||||
ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
|
||||
COPY requirements-ci.txt /opt/requirements-ci.txt
|
||||
COPY ./common/install_python.sh install_python.sh
|
||||
RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Install cuda and cudnn
|
||||
ARG CUDA_VERSION
|
||||
COPY ./common/install_cuda.sh install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu*
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
|
||||
ENV DESIRED_CUDA ${CUDA_VERSION}
|
||||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
|
||||
|
||||
# Note that Docker build forbids copying file outside the build context
|
||||
COPY ./common/install_linter.sh install_linter.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_linter.sh
|
||||
RUN rm install_linter.sh
|
||||
|
||||
RUN chown -R jenkins:jenkins /var/lib/jenkins/ci_env
|
||||
RUN rm install_linter.sh common_utils.sh
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
||||
|
||||
@ -15,18 +15,20 @@ COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG PYTHON_VERSION
|
||||
ARG PIP_CMAKE
|
||||
ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
|
||||
ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
|
||||
COPY requirements-ci.txt /opt/requirements-ci.txt
|
||||
COPY ./common/install_python.sh install_python.sh
|
||||
RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Note that Docker build forbids copying file outside the build context
|
||||
COPY ./common/install_linter.sh install_linter.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
RUN bash ./install_linter.sh
|
||||
RUN rm install_linter.sh
|
||||
RUN rm install_linter.sh common_utils.sh
|
||||
|
||||
USER jenkins
|
||||
CMD ["bash"]
|
||||
|
||||
@ -64,9 +64,7 @@ FROM base as cuda
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
# Install CUDA
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu*
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
|
||||
|
||||
FROM base as intel
|
||||
# MKL
|
||||
@ -197,6 +195,13 @@ RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
|
||||
# cmake3 is needed for the MIOpen build
|
||||
RUN ln -sf /usr/local/bin/cmake /usr/bin/cmake3
|
||||
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
|
||||
ADD ./common/install_miopen.sh install_miopen.sh
|
||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
|
||||
|
||||
# Install AOTriton
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./aotriton_version.txt aotriton_version.txt
|
||||
COPY ./common/install_aotriton.sh install_aotriton.sh
|
||||
RUN bash ./install_aotriton.sh /opt/rocm && rm install_aotriton.sh aotriton_version.txt
|
||||
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
|
||||
|
||||
153
.ci/docker/manywheel/Dockerfile_2014
Normal file
153
.ci/docker/manywheel/Dockerfile_2014
Normal file
@ -0,0 +1,153 @@
|
||||
# syntax = docker/dockerfile:experimental
|
||||
ARG ROCM_VERSION=3.7
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
ARG GPU_IMAGE=nvidia/cuda:${BASE_CUDA_VERSION}-devel-centos7
|
||||
FROM quay.io/pypa/manylinux2014_x86_64 as base
|
||||
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US.UTF-8
|
||||
|
||||
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
|
||||
RUN yum install -y wget curl perl util-linux xz bzip2 git patch which perl zlib-devel
|
||||
RUN yum install -y yum-utils centos-release-scl sudo
|
||||
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
|
||||
RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils
|
||||
ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH
|
||||
|
||||
# cmake
|
||||
RUN yum install -y cmake3 && \
|
||||
ln -s /usr/bin/cmake3 /usr/bin/cmake
|
||||
FROM base as openssl
|
||||
# Install openssl (this must precede `build python` step)
|
||||
# (In order to have a proper SSL module, Python is compiled
|
||||
# against a recent openssl [see env vars above], which is linked
|
||||
# statically. We delete openssl afterwards.)
|
||||
ADD ./common/install_openssl.sh install_openssl.sh
|
||||
RUN bash ./install_openssl.sh && rm install_openssl.sh
|
||||
|
||||
|
||||
|
||||
# remove unncessary python versions
|
||||
RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
|
||||
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
|
||||
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
|
||||
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
|
||||
|
||||
FROM base as cuda
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
# Install CUDA
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
|
||||
|
||||
FROM base as intel
|
||||
# MKL
|
||||
ADD ./common/install_mkl.sh install_mkl.sh
|
||||
RUN bash ./install_mkl.sh && rm install_mkl.sh
|
||||
|
||||
FROM base as magma
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
# Install magma
|
||||
ADD ./common/install_magma.sh install_magma.sh
|
||||
RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
|
||||
|
||||
FROM base as jni
|
||||
# Install java jni header
|
||||
ADD ./common/install_jni.sh install_jni.sh
|
||||
ADD ./java/jni.h jni.h
|
||||
RUN bash ./install_jni.sh && rm install_jni.sh
|
||||
|
||||
FROM base as libpng
|
||||
# Install libpng
|
||||
ADD ./common/install_libpng.sh install_libpng.sh
|
||||
RUN bash ./install_libpng.sh && rm install_libpng.sh
|
||||
|
||||
FROM ${GPU_IMAGE} as common
|
||||
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US.UTF-8
|
||||
RUN yum install -y \
|
||||
aclocal \
|
||||
autoconf \
|
||||
automake \
|
||||
bison \
|
||||
bzip2 \
|
||||
curl \
|
||||
diffutils \
|
||||
file \
|
||||
git \
|
||||
make \
|
||||
patch \
|
||||
perl \
|
||||
unzip \
|
||||
util-linux \
|
||||
wget \
|
||||
which \
|
||||
xz \
|
||||
yasm
|
||||
RUN yum install -y \
|
||||
https://repo.ius.io/ius-release-el7.rpm \
|
||||
https://ossci-linux.s3.amazonaws.com/epel-release-7-14.noarch.rpm
|
||||
|
||||
RUN yum swap -y git git236-core
|
||||
# git236+ would refuse to run git commands in repos owned by other users
|
||||
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
|
||||
# Override this behaviour by treating every folder as safe
|
||||
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
|
||||
RUN git config --global --add safe.directory "*"
|
||||
|
||||
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
|
||||
# Install LLVM version
|
||||
COPY --from=openssl /opt/openssl /opt/openssl
|
||||
COPY --from=base /opt/python /opt/python
|
||||
COPY --from=base /opt/_internal /opt/_internal
|
||||
COPY --from=base /usr/local/bin/auditwheel /usr/local/bin/auditwheel
|
||||
COPY --from=intel /opt/intel /opt/intel
|
||||
COPY --from=base /usr/local/bin/patchelf /usr/local/bin/patchelf
|
||||
COPY --from=libpng /usr/local/bin/png* /usr/local/bin/
|
||||
COPY --from=libpng /usr/local/bin/libpng* /usr/local/bin/
|
||||
COPY --from=libpng /usr/local/include/png* /usr/local/include/
|
||||
COPY --from=libpng /usr/local/include/libpng* /usr/local/include/
|
||||
COPY --from=libpng /usr/local/lib/libpng* /usr/local/lib/
|
||||
COPY --from=libpng /usr/local/lib/pkgconfig /usr/local/lib/pkgconfig
|
||||
COPY --from=jni /usr/local/include/jni.h /usr/local/include/jni.h
|
||||
|
||||
FROM common as cpu_final
|
||||
ARG BASE_CUDA_VERSION=10.2
|
||||
RUN yum install -y yum-utils centos-release-scl
|
||||
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
|
||||
RUN yum install -y devtoolset-7-gcc devtoolset-7-gcc-c++ devtoolset-7-gcc-gfortran devtoolset-7-binutils
|
||||
ENV PATH=/opt/rh/devtoolset-7/root/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-7/root/usr/lib64:/opt/rh/devtoolset-7/root/usr/lib:$LD_LIBRARY_PATH
|
||||
|
||||
# cmake
|
||||
RUN yum install -y cmake3 && \
|
||||
ln -s /usr/bin/cmake3 /usr/bin/cmake
|
||||
|
||||
# ninja
|
||||
RUN yum install -y http://repo.okay.com.mx/centos/7/x86_64/release/okay-release-1-1.noarch.rpm
|
||||
RUN yum install -y ninja-build
|
||||
|
||||
FROM cpu_final as cuda_final
|
||||
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
|
||||
FROM common as rocm_final
|
||||
ARG ROCM_VERSION=3.7
|
||||
# Install ROCm
|
||||
ADD ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh
|
||||
# cmake is already installed inside the rocm base image, but both 2 and 3 exist
|
||||
# cmake3 is needed for the later MIOpen custom build, so that step is last.
|
||||
RUN yum install -y cmake3 && \
|
||||
rm -f /usr/bin/cmake && \
|
||||
ln -s /usr/bin/cmake3 /usr/bin/cmake
|
||||
ADD ./common/install_miopen.sh install_miopen.sh
|
||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
|
||||
@ -1,4 +1,5 @@
|
||||
# syntax = docker/dockerfile:experimental
|
||||
ARG ROCM_VERSION=3.7
|
||||
ARG BASE_CUDA_VERSION=11.8
|
||||
ARG GPU_IMAGE=amd64/almalinux:8
|
||||
FROM quay.io/pypa/manylinux_2_28_x86_64 as base
|
||||
@ -36,9 +37,7 @@ FROM base as cuda
|
||||
ARG BASE_CUDA_VERSION=11.8
|
||||
# Install CUDA
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu*
|
||||
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh
|
||||
|
||||
FROM base as intel
|
||||
# MKL
|
||||
@ -131,10 +130,10 @@ RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
|
||||
done;
|
||||
|
||||
|
||||
# cmake-3.18.4 from pip; force in case cmake3 already exists
|
||||
# cmake-3.18.4 from pip
|
||||
RUN yum install -y python3-pip && \
|
||||
python3 -mpip install cmake==3.18.4 && \
|
||||
ln -sf /usr/local/bin/cmake /usr/bin/cmake3
|
||||
ln -s /usr/local/bin/cmake /usr/bin/cmake3
|
||||
|
||||
FROM cpu_final as cuda_final
|
||||
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
@ -143,24 +142,17 @@ COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BAS
|
||||
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
|
||||
ENV PATH=/usr/local/cuda/bin:$PATH
|
||||
|
||||
FROM cpu_final as rocm_final
|
||||
ARG ROCM_VERSION=6.0
|
||||
ARG PYTORCH_ROCM_ARCH
|
||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||
ARG DEVTOOLSET_VERSION=11
|
||||
ENV LDFLAGS="-Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64 -Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib"
|
||||
# Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path,
|
||||
# below workaround helps avoid error
|
||||
ENV ROCM_PATH /opt/rocm
|
||||
# cmake-3.28.4 from pip to get enable_language(HIP)
|
||||
# and avoid 3.21.0 cmake+ninja issues with ninja inserting "-Wl,--no-as-needed" in LINK_FLAGS for static linker
|
||||
RUN python3 -m pip install --upgrade pip && \
|
||||
python3 -mpip install cmake==3.28.4
|
||||
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
|
||||
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
|
||||
ENV MKLROOT /opt/intel
|
||||
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
|
||||
|
||||
FROM common as rocm_final
|
||||
ARG ROCM_VERSION=3.7
|
||||
# Install ROCm
|
||||
ADD ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh
|
||||
# cmake is already installed inside the rocm base image, but both 2 and 3 exist
|
||||
# cmake3 is needed for the later MIOpen custom build, so that step is last.
|
||||
RUN yum install -y cmake3 && \
|
||||
rm -f /usr/bin/cmake && \
|
||||
ln -s /usr/bin/cmake3 /usr/bin/cmake
|
||||
ADD ./common/install_miopen.sh install_miopen.sh
|
||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
|
||||
|
||||
|
||||
@ -38,12 +38,6 @@ RUN yum install -y \
|
||||
sudo \
|
||||
gcc-toolset-${GCCTOOLSET_VERSION}-toolchain
|
||||
|
||||
# (optional) Install non-default Ninja version
|
||||
ARG NINJA_VERSION
|
||||
COPY ./common/install_ninja.sh install_ninja.sh
|
||||
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
|
||||
RUN rm install_ninja.sh
|
||||
|
||||
# Ensure the expected devtoolset is used
|
||||
ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
|
||||
|
||||
@ -61,7 +61,7 @@ RUN git config --global --add safe.directory "*"
|
||||
# NOTE: Need a better way to get this library as Ubuntu's package can be removed by the vender, or changed
|
||||
###############################################################################
|
||||
RUN cd ~/ \
|
||||
&& curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-4ubuntu2_arm64.deb \
|
||||
&& curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb \
|
||||
&& ar x ~/libgfortran-10-dev.deb \
|
||||
&& tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ \
|
||||
&& cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/
|
||||
|
||||
@ -67,9 +67,7 @@ FROM base as cuda
|
||||
ARG BASE_CUDA_VERSION
|
||||
# Install CUDA
|
||||
ADD ./common/install_cuda_aarch64.sh install_cuda_aarch64.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda_aarch64.sh ${BASE_CUDA_VERSION} && rm install_cuda_aarch64.sh install_nccl.sh ci_commit_pins/nccl-cu*
|
||||
RUN bash ./install_cuda_aarch64.sh ${BASE_CUDA_VERSION} && rm install_cuda_aarch64.sh
|
||||
|
||||
FROM base as magma
|
||||
ARG BASE_CUDA_VERSION
|
||||
|
||||
@ -42,7 +42,6 @@ RUN yum install -y \
|
||||
llvm-devel \
|
||||
libzstd-devel \
|
||||
python3.12-devel \
|
||||
python3.12-test \
|
||||
python3.12-setuptools \
|
||||
python3.12-pip \
|
||||
python3-virtualenv \
|
||||
@ -102,33 +101,24 @@ CMD ["/bin/bash"]
|
||||
|
||||
# install test dependencies:
|
||||
# - grpcio requires system openssl, bundled crypto fails to build
|
||||
# - ml_dtypes 0.4.0 requires some fixes provided in later commits to build
|
||||
RUN dnf install -y \
|
||||
protobuf-devel \
|
||||
protobuf-c-devel \
|
||||
protobuf-lite-devel \
|
||||
hdf5-devel \
|
||||
python3-h5py \
|
||||
git
|
||||
wget \
|
||||
patch
|
||||
|
||||
RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
|
||||
|
||||
# cmake-3.28.0 from pip for onnxruntime
|
||||
RUN python3 -mpip install cmake==3.28.0
|
||||
|
||||
# build onnxruntime 1.21.0 from sources.
|
||||
# it is not possible to build it from sources using pip,
|
||||
# so just build it from upstream repository.
|
||||
# h5py is dependency of onnxruntime_training.
|
||||
# h5py==3.11.0 builds with hdf5-devel 1.10.5 from repository.
|
||||
# install newest flatbuffers version first:
|
||||
# for some reason old version is getting pulled in otherwise.
|
||||
# packaging package is required for onnxruntime wheel build.
|
||||
RUN pip3 install flatbuffers && \
|
||||
pip3 install h5py==3.11.0 && \
|
||||
pip3 install packaging && \
|
||||
git clone https://github.com/microsoft/onnxruntime && \
|
||||
cd onnxruntime && git checkout v1.21.0 && \
|
||||
RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio==1.65.4
|
||||
RUN cd ~ && \
|
||||
git clone https://github.com/jax-ml/ml_dtypes && \
|
||||
cd ml_dtypes && \
|
||||
git checkout v0.4.0 && \
|
||||
git submodule update --init --recursive && \
|
||||
./build.sh --config Release --parallel 0 --enable_pybind --build_wheel --enable_training --enable_training_apis --enable_training_ops --skip_tests --allow_running_as_root && \
|
||||
pip3 install ./build/Linux/Release/dist/onnxruntime_training-*.whl && \
|
||||
cd .. && /bin/rm -rf ./onnxruntime
|
||||
wget https://github.com/jax-ml/ml_dtypes/commit/b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
|
||||
wget https://github.com/jax-ml/ml_dtypes/commit/d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
|
||||
patch -p1 < b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
|
||||
patch -p1 < d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
|
||||
python3 setup.py bdist_wheel && \
|
||||
pip3 install dist/*.whl && \
|
||||
rm -rf ml_dtypes
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
# Script used only in CD pipeline
|
||||
|
||||
set -exou pipefail
|
||||
set -eou pipefail
|
||||
|
||||
TOPDIR=$(git rev-parse --show-toplevel)
|
||||
|
||||
@ -9,110 +9,155 @@ image="$1"
|
||||
shift
|
||||
|
||||
if [ -z "${image}" ]; then
|
||||
echo "Usage: $0 IMAGE:ARCHTAG"
|
||||
echo "Usage: $0 IMAGE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Go from imagename:tag to tag
|
||||
DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
|
||||
DOCKER_IMAGE="pytorch/${image}"
|
||||
|
||||
GPU_ARCH_VERSION=""
|
||||
if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
|
||||
# extract cuda version from image name. e.g. manylinux2_28-builder:cuda12.8 returns 12.8
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
|
||||
elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
|
||||
# extract rocm version from image name. e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
|
||||
GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
|
||||
fi
|
||||
DOCKER_REGISTRY="${DOCKER_REGISTRY:-docker.io}"
|
||||
|
||||
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
|
||||
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||
MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-}
|
||||
DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-}
|
||||
WITH_PUSH=${WITH_PUSH:-}
|
||||
|
||||
case ${image} in
|
||||
manylinux2_28-builder:cpu)
|
||||
case ${GPU_ARCH_TYPE} in
|
||||
cpu)
|
||||
TARGET=cpu_final
|
||||
DOCKER_TAG=cpu
|
||||
GPU_IMAGE=centos:7
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
|
||||
;;
|
||||
cpu-manylinux_2_28)
|
||||
TARGET=cpu_final
|
||||
DOCKER_TAG=cpu
|
||||
GPU_IMAGE=amd64/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
;;
|
||||
manylinuxaarch64-builder:cpu-aarch64)
|
||||
cpu-aarch64)
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-aarch64
|
||||
GPU_IMAGE=arm64v8/centos:7
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=10"
|
||||
MANY_LINUX_VERSION="aarch64"
|
||||
;;
|
||||
manylinux2_28_aarch64-builder:cpu-aarch64)
|
||||
cpu-aarch64-2_28)
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-aarch64
|
||||
GPU_IMAGE=arm64v8/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11 --build-arg NINJA_VERSION=1.12.1"
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="2_28_aarch64"
|
||||
;;
|
||||
manylinuxcxx11-abi-builder:cpu-cxx11-abi)
|
||||
cpu-cxx11-abi)
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-cxx11-abi
|
||||
GPU_IMAGE=""
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
|
||||
MANY_LINUX_VERSION="cxx11-abi"
|
||||
;;
|
||||
manylinuxs390x-builder:cpu-s390x)
|
||||
cpu-s390x)
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-s390x
|
||||
GPU_IMAGE=s390x/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
MANY_LINUX_VERSION="s390x"
|
||||
;;
|
||||
manylinux2_28-builder:cuda*)
|
||||
cuda)
|
||||
TARGET=cuda_final
|
||||
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||
# Keep this up to date with the minimum version of CUDA we currently support
|
||||
GPU_IMAGE=centos:7
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=9"
|
||||
;;
|
||||
cuda-manylinux_2_28)
|
||||
TARGET=cuda_final
|
||||
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=amd64/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
;;
|
||||
manylinuxaarch64-builder:cuda*)
|
||||
cuda-aarch64)
|
||||
TARGET=cuda_final
|
||||
DOCKER_TAG=cuda${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=arm64v8/centos:7
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="aarch64"
|
||||
DOCKERFILE_SUFFIX="_cuda_aarch64"
|
||||
;;
|
||||
manylinux2_28-builder:rocm*)
|
||||
rocm)
|
||||
TARGET=rocm_final
|
||||
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
|
||||
DEVTOOLSET_VERSION="9"
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
DEVTOOLSET_VERSION="11"
|
||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
|
||||
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
|
||||
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
|
||||
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
|
||||
else
|
||||
echo "ERROR: rocm regex failed"
|
||||
exit 1
|
||||
fi
|
||||
if [[ $ROCM_VERSION_INT -ge 60000 ]]; then
|
||||
PYTORCH_ROCM_ARCH+=";gfx942"
|
||||
fi
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9"
|
||||
;;
|
||||
manylinux2_28-builder:xpu)
|
||||
xpu)
|
||||
TARGET=xpu_final
|
||||
DOCKER_TAG=xpu
|
||||
GPU_IMAGE=amd64/almalinux:8
|
||||
DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
;;
|
||||
*)
|
||||
echo "ERROR: Unrecognized image name: ${image}"
|
||||
echo "ERROR: Unrecognized GPU_ARCH_TYPE: ${GPU_ARCH_TYPE}"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
IMAGES=''
|
||||
|
||||
if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
|
||||
DOCKERFILE_SUFFIX=_${MANY_LINUX_VERSION}
|
||||
fi
|
||||
# Only activate this if in CI
|
||||
if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
(
|
||||
set -x
|
||||
|
||||
if [ "$(uname -m)" != "s390x" ]; then
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
fi
|
||||
|
||||
DOCKER_BUILDKIT=1 docker build \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--target "${TARGET}" \
|
||||
-t "${DOCKER_IMAGE}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
)
|
||||
|
||||
GITHUB_REF=${GITHUB_REF:-$(git symbolic-ref -q HEAD || git describe --tags --exact-match)}
|
||||
GIT_BRANCH_NAME=${GITHUB_REF##*/}
|
||||
GIT_COMMIT_SHA=${GITHUB_SHA:-$(git rev-parse HEAD)}
|
||||
DOCKER_IMAGE_BRANCH_TAG=${DOCKER_IMAGE}-${GIT_BRANCH_NAME}
|
||||
DOCKER_IMAGE_SHA_TAG=${DOCKER_IMAGE}-${GIT_COMMIT_SHA}
|
||||
|
||||
if [[ "${WITH_PUSH}" == true ]]; then
|
||||
(
|
||||
set -x
|
||||
docker push "${DOCKER_IMAGE}"
|
||||
if [[ -n ${GITHUB_REF} ]]; then
|
||||
docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_BRANCH_TAG}
|
||||
docker tag ${DOCKER_IMAGE} ${DOCKER_IMAGE_SHA_TAG}
|
||||
docker push "${DOCKER_IMAGE_BRANCH_TAG}"
|
||||
docker push "${DOCKER_IMAGE_SHA_TAG}"
|
||||
fi
|
||||
)
|
||||
fi
|
||||
|
||||
tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
DOCKER_BUILDKIT=1 docker build \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
--build-arg "GPU_IMAGE=${GPU_IMAGE}" \
|
||||
--target "${TARGET}" \
|
||||
-t "${tmp_tag}" \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
|
||||
"${TOPDIR}/.ci/docker/"
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# Script used only in CD pipeline
|
||||
|
||||
OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/
|
||||
CURL_DOWNLOAD_URL=https://curl.se/download
|
||||
CURL_DOWNLOAD_URL=https://curl.askapache.com/download
|
||||
|
||||
AUTOCONF_DOWNLOAD_URL=https://ftp.gnu.org/gnu/autoconf
|
||||
|
||||
|
||||
@ -30,10 +30,10 @@ dill==0.3.7
|
||||
#Pinned versions: 0.3.7
|
||||
#test that import: dynamo/test_replay_record.py test_dataloader.py test_datapipe.py test_serialization.py
|
||||
|
||||
expecttest==0.3.0
|
||||
expecttest==0.2.1
|
||||
#Description: method for writing tests where test framework auto populates
|
||||
# the expected output based on previous runs
|
||||
#Pinned versions: 0.3.0
|
||||
#Pinned versions: 0.2.1
|
||||
#test that import:
|
||||
|
||||
fbscribelogger==0.1.7
|
||||
@ -41,14 +41,11 @@ fbscribelogger==0.1.7
|
||||
#Pinned versions: 0.1.6
|
||||
#test that import:
|
||||
|
||||
flatbuffers==2.0 ; platform_machine != "s390x"
|
||||
flatbuffers==2.0
|
||||
#Description: cross platform serialization library
|
||||
#Pinned versions: 2.0
|
||||
#test that import:
|
||||
|
||||
flatbuffers ; platform_machine == "s390x"
|
||||
#Description: cross platform serialization library; Newer version is required on s390x for new python version
|
||||
|
||||
hypothesis==5.35.1
|
||||
# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
|
||||
#Description: advanced library for generating parametrized tests
|
||||
@ -93,10 +90,10 @@ librosa>=0.6.2 ; python_version < "3.11"
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
mypy==1.14.0
|
||||
mypy==1.11.2
|
||||
# Pin MyPy version because new errors are likely to appear with each release
|
||||
#Description: linter
|
||||
#Pinned versions: 1.14.0
|
||||
#Pinned versions: 1.10.0
|
||||
#test that import: test_typing.py, test_type_hints.py
|
||||
|
||||
networkx==2.8.8
|
||||
@ -105,10 +102,10 @@ networkx==2.8.8
|
||||
#Pinned versions: 2.8.8
|
||||
#test that import: functorch
|
||||
|
||||
ninja==1.11.1.3
|
||||
#Description: build system. Used in some tests. Used in build to generate build
|
||||
#time tracing information
|
||||
#Pinned versions: 1.11.1.3
|
||||
#ninja
|
||||
#Description: build system. Note that it install from
|
||||
#here breaks things so it is commented out
|
||||
#Pinned versions: 1.10.0.post1
|
||||
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
|
||||
|
||||
numba==0.49.0 ; python_version < "3.9"
|
||||
@ -135,9 +132,6 @@ numpy==1.22.4; python_version == "3.9" or python_version == "3.10"
|
||||
numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
|
||||
numpy==2.1.2; python_version >= "3.13"
|
||||
|
||||
pandas==2.0.3; python_version < "3.13"
|
||||
pandas==2.2.3; python_version >= "3.13"
|
||||
|
||||
#onnxruntime
|
||||
#Description: scoring engine for Open Neural Network Exchange (ONNX) models
|
||||
#Pinned versions: 1.9.0
|
||||
@ -161,7 +155,7 @@ optree==0.13.0
|
||||
#test_pointwise_ops.py, test_dtensor_ops.py, test_torchinductor.py, test_fx.py,
|
||||
#test_fake_tensor.py, test_mps.py
|
||||
|
||||
pillow==11.0.0
|
||||
pillow==10.3.0
|
||||
#Description: Python Imaging Library fork
|
||||
#Pinned versions: 10.3.0
|
||||
#test that import:
|
||||
@ -196,11 +190,6 @@ pytest-rerunfailures>=10.3
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
pytest-subtests==0.13.1
|
||||
#Description: plugin for subtest support
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
#pytest-benchmark
|
||||
#Description: fixture for benchmarking code
|
||||
#Pinned versions: 3.2.3
|
||||
@ -248,7 +237,7 @@ scikit-image==0.22.0 ; python_version >= "3.10"
|
||||
#test that import:
|
||||
|
||||
scipy==1.10.1 ; python_version <= "3.11"
|
||||
scipy==1.14.1 ; python_version >= "3.12"
|
||||
scipy==1.12.0 ; python_version == "3.12"
|
||||
# Pin SciPy because of failing distribution tests (see #60347)
|
||||
#Description: scientific python
|
||||
#Pinned versions: 1.10.1
|
||||
@ -283,9 +272,9 @@ unittest-xml-reporting<=3.2.0,>=2.0.0
|
||||
#test that import:
|
||||
|
||||
#lintrunner is supported on aarch64-linux only from 0.12.4 version
|
||||
lintrunner==0.12.7
|
||||
lintrunner==0.12.5
|
||||
#Description: all about linters!
|
||||
#Pinned versions: 0.12.7
|
||||
#Pinned versions: 0.12.5
|
||||
#test that import:
|
||||
|
||||
redis>=4.0.0
|
||||
@ -297,7 +286,7 @@ ghstack==0.8.0
|
||||
#Pinned versions: 0.8.0
|
||||
#test that import:
|
||||
|
||||
jinja2==3.1.6
|
||||
jinja2==3.1.4
|
||||
#Description: jinja2 template engine
|
||||
#Pinned versions: 3.1.4
|
||||
#test that import:
|
||||
@ -307,32 +296,31 @@ pytest-cpp==2.3.0
|
||||
#Pinned versions: 2.3.0
|
||||
#test that import:
|
||||
|
||||
z3-solver==4.12.6.0
|
||||
z3-solver==4.12.2.0
|
||||
#Description: The Z3 Theorem Prover Project
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
tensorboard==2.13.0 ; python_version < "3.13"
|
||||
tensorboard==2.18.0 ; python_version >= "3.13"
|
||||
tensorboard==2.13.0
|
||||
#Description: Also included in .ci/docker/requirements-docs.txt
|
||||
#Pinned versions:
|
||||
#test that import: test_tensorboard
|
||||
|
||||
pywavelets==1.4.1 ; python_version < "3.12"
|
||||
pywavelets==1.7.0 ; python_version >= "3.12"
|
||||
pywavelets==1.5.0 ; python_version >= "3.12"
|
||||
#Description: This is a requirement of scikit-image, we need to pin
|
||||
# it here because 1.5.0 conflicts with numpy 1.21.2 used in CI
|
||||
#Pinned versions: 1.4.1
|
||||
#test that import:
|
||||
|
||||
lxml==5.3.0
|
||||
lxml==5.0.0
|
||||
#Description: This is a requirement of unittest-xml-reporting
|
||||
|
||||
# Python-3.9 binaries
|
||||
|
||||
PyGithub==2.3.0
|
||||
|
||||
sympy==1.13.3
|
||||
sympy==1.13.1 ; python_version >= "3.9"
|
||||
#Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
@ -342,7 +330,7 @@ onnx==1.17.0
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
onnxscript==0.2.2
|
||||
onnxscript==0.1.0.dev20240817
|
||||
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
@ -356,7 +344,7 @@ parameterized==0.8.1
|
||||
#Pinned versions: 1.24.0
|
||||
#test that import: test_sac_estimator.py
|
||||
|
||||
pwlf==2.2.1
|
||||
pwlf==2.2.1 ; python_version >= "3.8"
|
||||
#Description: required for testing torch/distributed/_tools/sac_estimator.py
|
||||
#Pinned versions: 2.2.1
|
||||
#test that import: test_sac_estimator.py
|
||||
@ -365,17 +353,12 @@ pwlf==2.2.1
|
||||
# To build PyTorch itself
|
||||
astunparse
|
||||
PyYAML
|
||||
pyzstd
|
||||
setuptools
|
||||
|
||||
ninja==1.11.1 ; platform_machine == "aarch64"
|
||||
scons==4.5.2 ; platform_machine == "aarch64"
|
||||
|
||||
pulp==2.9.0
|
||||
pulp==2.9.0 ; python_version >= "3.8"
|
||||
#Description: required for testing ilp formulaiton under torch/distributed/_tools
|
||||
#Pinned versions: 2.9.0
|
||||
#test that import: test_sac_ilp.py
|
||||
|
||||
dataclasses_json==0.6.7
|
||||
#Description: required for data pipeline and scripts under tools/stats
|
||||
#Pinned versions: 0.6.7
|
||||
#test that import:
|
||||
|
||||
@ -1,26 +1,20 @@
|
||||
sphinx==5.3.0
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 5.3.0
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@a98ffecb792d50df495be401becbf5c414421423#egg=pytorch_sphinx_theme2
|
||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
|
||||
|
||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||
# but it doesn't seem to work and hangs around idly. The initial thought is probably
|
||||
# something related to Docker setup. We can investigate this later
|
||||
|
||||
sphinxcontrib.katex==0.8.6
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 0.8.6
|
||||
|
||||
sphinxext-opengraph==0.9.1
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 0.9.1
|
||||
|
||||
matplotlib==3.5.3
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 3.5.3
|
||||
|
||||
tensorboard==2.13.0 ; python_version < "3.13"
|
||||
tensorboard==2.18.0 ; python_version >= "3.13"
|
||||
tensorboard==2.13.0
|
||||
#Description: This is used to generate PyTorch docs
|
||||
#Pinned versions: 2.13.0
|
||||
|
||||
@ -51,6 +45,5 @@ myst-nb==0.17.2
|
||||
# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
|
||||
python-etcd==0.4.5
|
||||
sphinx-copybutton==0.5.0
|
||||
sphinx-design==0.4.0
|
||||
sphinxcontrib-mermaid==1.0.0
|
||||
sphinx-panels==0.4.1
|
||||
myst-parser==0.18.1
|
||||
|
||||
@ -1 +1 @@
|
||||
3.3.0
|
||||
3.1.0
|
||||
|
||||
@ -2,7 +2,7 @@ ARG UBUNTU_VERSION
|
||||
ARG CUDA_VERSION
|
||||
ARG IMAGE_NAME
|
||||
|
||||
FROM ${IMAGE_NAME} as base
|
||||
FROM ${IMAGE_NAME}
|
||||
|
||||
ARG UBUNTU_VERSION
|
||||
ARG CUDA_VERSION
|
||||
@ -30,8 +30,7 @@ ARG CONDA_CMAKE
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./common/install_magma_conda.sh install_magma_conda.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
|
||||
|
||||
# Install gcc
|
||||
ARG GCC_VERSION
|
||||
@ -50,6 +49,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
@ -74,8 +80,6 @@ RUN bash ./install_openssl.sh
|
||||
ENV OPENSSL_DIR /opt/openssl
|
||||
|
||||
ARG INDUCTOR_BENCHMARKS
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/huggingface.txt huggingface.txt
|
||||
@ -90,20 +94,14 @@ RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
|
||||
RUN rm install_cmake.sh
|
||||
|
||||
ARG TRITON
|
||||
|
||||
FROM base as triton-builder
|
||||
# Install triton, this needs to be done before sccache because the latter will
|
||||
# try to reach out to S3, which docker build runners don't have access
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton.txt triton.txt
|
||||
COPY triton_version.txt triton_version.txt
|
||||
RUN bash ./install_triton.sh
|
||||
|
||||
FROM base as final
|
||||
COPY --from=triton-builder /opt/triton /opt/triton
|
||||
RUN if [ -n "${TRITON}" ]; then pip install /opt/triton/*.whl; chown -R jenkins:jenkins /opt/conda; fi
|
||||
RUN rm -rf /opt/triton
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
|
||||
|
||||
ARG HALIDE
|
||||
# Build and install halide
|
||||
@ -158,16 +156,6 @@ COPY ./common/install_cusparselt.sh install_cusparselt.sh
|
||||
RUN bash install_cusparselt.sh
|
||||
RUN rm install_cusparselt.sh
|
||||
|
||||
# Install NCCL
|
||||
ARG CUDA_VERSION
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash install_nccl.sh
|
||||
RUN rm install_nccl.sh /ci_commit_pins/nccl-cu*
|
||||
ENV USE_SYSTEM_NCCL=1
|
||||
ENV NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
|
||||
# Install CUDSS
|
||||
ARG CUDA_VERSION
|
||||
COPY ./common/install_cudss.sh install_cudss.sh
|
||||
|
||||
@ -14,20 +14,21 @@ ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||
COPY ./common/install_base.sh install_base.sh
|
||||
RUN bash ./install_base.sh && rm install_base.sh
|
||||
|
||||
# Install clang
|
||||
ARG LLVMDEV
|
||||
ARG CLANG_VERSION
|
||||
COPY ./common/install_clang.sh install_clang.sh
|
||||
RUN bash ./install_clang.sh && rm install_clang.sh
|
||||
|
||||
# Install user
|
||||
COPY ./common/install_user.sh install_user.sh
|
||||
RUN bash ./install_user.sh && rm install_user.sh
|
||||
|
||||
# Install katex
|
||||
ARG KATEX
|
||||
COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
|
||||
RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
|
||||
|
||||
# Install conda and other packages (e.g., numpy, pytest)
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ARG CONDA_CMAKE
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
|
||||
ARG CONDA_CMAKE
|
||||
COPY requirements-ci.txt /opt/conda/requirements-ci.txt
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
@ -38,11 +39,6 @@ ARG GCC_VERSION
|
||||
COPY ./common/install_gcc.sh install_gcc.sh
|
||||
RUN bash ./install_gcc.sh && rm install_gcc.sh
|
||||
|
||||
# Install clang
|
||||
ARG CLANG_VERSION
|
||||
COPY ./common/install_clang.sh install_clang.sh
|
||||
RUN bash ./install_clang.sh && rm install_clang.sh
|
||||
|
||||
# (optional) Install protobuf for ONNX
|
||||
ARG PROTOBUF
|
||||
COPY ./common/install_protobuf.sh install_protobuf.sh
|
||||
@ -50,6 +46,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
@ -63,7 +66,7 @@ COPY ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh
|
||||
RUN rm install_rocm.sh
|
||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
||||
RUN bash ./install_rocm_magma.sh
|
||||
RUN rm install_rocm_magma.sh
|
||||
ADD ./common/install_miopen.sh install_miopen.sh
|
||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
|
||||
@ -82,32 +85,6 @@ COPY ./common/install_amdsmi.sh install_amdsmi.sh
|
||||
RUN bash ./install_amdsmi.sh
|
||||
RUN rm install_amdsmi.sh
|
||||
|
||||
# (optional) Install UCC
|
||||
ARG UCX_COMMIT
|
||||
ARG UCC_COMMIT
|
||||
ENV UCX_COMMIT $UCX_COMMIT
|
||||
ENV UCC_COMMIT $UCC_COMMIT
|
||||
ENV UCX_HOME /usr
|
||||
ENV UCC_HOME /usr
|
||||
ADD ./common/install_ucc.sh install_ucc.sh
|
||||
RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
|
||||
RUN rm install_ucc.sh
|
||||
|
||||
COPY ./common/install_openssl.sh install_openssl.sh
|
||||
ENV OPENSSL_ROOT_DIR /opt/openssl
|
||||
RUN bash ./install_openssl.sh
|
||||
ENV OPENSSL_DIR /opt/openssl
|
||||
|
||||
ARG INDUCTOR_BENCHMARKS
|
||||
ARG ANACONDA_PYTHON_VERSION
|
||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/huggingface.txt huggingface.txt
|
||||
COPY ci_commit_pins/timm.txt timm.txt
|
||||
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
@ -130,17 +107,18 @@ COPY triton_version.txt triton_version.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
|
||||
|
||||
# Install AOTriton
|
||||
COPY ./aotriton_version.txt aotriton_version.txt
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./common/install_aotriton.sh install_aotriton.sh
|
||||
RUN ["/bin/bash", "-c", "./install_aotriton.sh /opt/rocm && rm -rf install_aotriton.sh aotriton_version.txt common_utils.sh"]
|
||||
ENV AOTRITON_INSTALLED_PREFIX /opt/rocm/aotriton
|
||||
|
||||
# Install ccache/sccache (do this last, so we get priority in PATH)
|
||||
COPY ./common/install_cache.sh install_cache.sh
|
||||
ENV PATH /opt/cache/bin:$PATH
|
||||
RUN bash ./install_cache.sh && rm install_cache.sh
|
||||
|
||||
# Install Open MPI for ROCm
|
||||
COPY ./common/install_openmpi.sh install_openmpi.sh
|
||||
RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
|
||||
RUN rm install_openmpi.sh
|
||||
|
||||
# Include BUILD_ENVIRONMENT environment variable in image
|
||||
ARG BUILD_ENVIRONMENT
|
||||
ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
|
||||
|
||||
@ -77,6 +77,13 @@ COPY triton_version.txt triton_version.txt
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton-xpu.txt triton_version.txt
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
ARG UBUNTU_VERSION
|
||||
|
||||
FROM ubuntu:${UBUNTU_VERSION} as base
|
||||
FROM ubuntu:${UBUNTU_VERSION}
|
||||
|
||||
ARG UBUNTU_VERSION
|
||||
|
||||
@ -36,8 +36,7 @@ ENV DOCS=$DOCS
|
||||
COPY requirements-ci.txt requirements-docs.txt /opt/conda/
|
||||
COPY ./common/install_conda.sh install_conda.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./common/install_magma_conda.sh install_magma_conda.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
|
||||
RUN if [ -n "${UNINSTALL_DILL}" ]; then pip uninstall -y dill; fi
|
||||
|
||||
# Install gcc
|
||||
@ -52,16 +51,9 @@ RUN bash ./install_lcov.sh && rm install_lcov.sh
|
||||
# Install cuda and cudnn
|
||||
ARG CUDA_VERSION
|
||||
COPY ./common/install_cuda.sh install_cuda.sh
|
||||
COPY ./common/install_nccl.sh install_nccl.sh
|
||||
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu*
|
||||
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
|
||||
ENV DESIRED_CUDA ${CUDA_VERSION}
|
||||
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
|
||||
# No effect if cuda not installed
|
||||
ENV USE_SYSTEM_NCCL=1
|
||||
ENV NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
|
||||
|
||||
# (optional) Install UCC
|
||||
ARG UCX_COMMIT
|
||||
@ -81,6 +73,13 @@ RUN if [ -n "${PROTOBUF}" ]; then bash ./install_protobuf.sh; fi
|
||||
RUN rm install_protobuf.sh
|
||||
ENV INSTALLED_PROTOBUF ${PROTOBUF}
|
||||
|
||||
# (optional) Install database packages like LMDB and LevelDB
|
||||
ARG DB
|
||||
COPY ./common/install_db.sh install_db.sh
|
||||
RUN if [ -n "${DB}" ]; then bash ./install_db.sh; fi
|
||||
RUN rm install_db.sh
|
||||
ENV INSTALLED_DB ${DB}
|
||||
|
||||
# (optional) Install vision packages like OpenCV
|
||||
ARG VISION
|
||||
COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
@ -88,6 +87,18 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
|
||||
RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
|
||||
ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# (optional) Install Vulkan SDK
|
||||
ARG VULKAN_SDK_VERSION
|
||||
COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
|
||||
RUN if [ -n "${VULKAN_SDK_VERSION}" ]; then bash ./install_vulkan_sdk.sh; fi
|
||||
RUN rm install_vulkan_sdk.sh
|
||||
|
||||
# (optional) Install swiftshader
|
||||
ARG SWIFTSHADER
|
||||
COPY ./common/install_swiftshader.sh install_swiftshader.sh
|
||||
RUN if [ -n "${SWIFTSHADER}" ]; then bash ./install_swiftshader.sh; fi
|
||||
RUN rm install_swiftshader.sh
|
||||
|
||||
# (optional) Install non-default CMake version
|
||||
ARG CMAKE_VERSION
|
||||
COPY ./common/install_cmake.sh install_cmake.sh
|
||||
@ -115,21 +126,20 @@ RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_d
|
||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
|
||||
|
||||
ARG TRITON
|
||||
ARG TRITON_CPU
|
||||
|
||||
# Create a separate stage for building Triton and Triton-CPU. install_triton
|
||||
# will check for the presence of env vars
|
||||
FROM base as triton-builder
|
||||
# Install triton, this needs to be done before sccache because the latter will
|
||||
# try to reach out to S3, which docker build runners don't have access
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton.txt triton.txt
|
||||
COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
|
||||
RUN bash ./install_triton.sh
|
||||
RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton.txt
|
||||
|
||||
FROM base as final
|
||||
COPY --from=triton-builder /opt/triton /opt/triton
|
||||
RUN if [ -n "${TRITON}" ] || [ -n "${TRITON_CPU}" ]; then pip install /opt/triton/*.whl; chown -R jenkins:jenkins /opt/conda; fi
|
||||
RUN rm -rf /opt/triton
|
||||
ARG TRITON_CPU
|
||||
COPY ./common/install_triton.sh install_triton.sh
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
|
||||
RUN if [ -n "${TRITON_CPU}" ]; then bash ./install_triton.sh; fi
|
||||
RUN rm install_triton.sh common_utils.sh triton-cpu.txt
|
||||
|
||||
ARG EXECUTORCH
|
||||
# Build and install executorch
|
||||
|
||||
2
.ci/magma-rocm/.gitignore
vendored
2
.ci/magma-rocm/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
output/
|
||||
magma-rocm*/
|
||||
@ -1,41 +0,0 @@
|
||||
SHELL=/usr/bin/env bash
|
||||
|
||||
DOCKER_CMD ?= docker
|
||||
DESIRED_ROCM ?= 6.4
|
||||
DESIRED_ROCM_SHORT = $(subst .,,$(DESIRED_ROCM))
|
||||
PACKAGE_NAME = magma-rocm
|
||||
# inherit this from underlying docker image, do not pass this env var to docker
|
||||
#PYTORCH_ROCM_ARCH ?= gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201
|
||||
|
||||
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
||||
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
|
||||
-w /builder \
|
||||
-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_ROCM_SHORT} \
|
||||
-e DESIRED_ROCM=${DESIRED_ROCM} \
|
||||
"pytorch/manylinux2_28-builder:rocm${DESIRED_ROCM}-main" \
|
||||
magma-rocm/build_magma.sh
|
||||
|
||||
.PHONY: all
|
||||
all: magma-rocm64
|
||||
all: magma-rocm63
|
||||
all: magma-rocm624
|
||||
|
||||
.PHONY:
|
||||
clean:
|
||||
$(RM) -r magma-*
|
||||
$(RM) -r output
|
||||
|
||||
.PHONY: magma-rocm64
|
||||
magma-rocm64: DESIRED_ROCM := 6.4
|
||||
magma-rocm64:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-rocm63
|
||||
magma-rocm63: DESIRED_ROCM := 6.3
|
||||
magma-rocm63:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-rocm624
|
||||
magma-rocm624: DESIRED_ROCM := 6.2.4
|
||||
magma-rocm624:
|
||||
$(DOCKER_RUN)
|
||||
@ -1,48 +0,0 @@
|
||||
# Magma ROCm
|
||||
|
||||
This folder contains the scripts and configurations to build libmagma.so, linked for various versions of ROCm.
|
||||
|
||||
## Building
|
||||
|
||||
Look in the `Makefile` for available targets to build. To build any target, for example `magma-rocm63`, run
|
||||
|
||||
```
|
||||
# Using `docker`
|
||||
make magma-rocm63
|
||||
|
||||
# Using `podman`
|
||||
DOCKER_CMD=podman make magma-rocm63
|
||||
```
|
||||
|
||||
This spawns a `pytorch/manylinux-rocm<version>` docker image, which has the required `devtoolset` and ROCm versions installed.
|
||||
Within the docker image, it runs `build_magma.sh` with the correct environment variables set, which package the necessary files
|
||||
into a tarball, with the following structure:
|
||||
|
||||
```
|
||||
.
|
||||
├── include # header files
|
||||
├── lib # libmagma.so
|
||||
├── info
|
||||
│ ├── licenses # license file
|
||||
│ └── recipe # build script
|
||||
```
|
||||
|
||||
More specifically, `build_magma.sh` copies over the relevant files from the `package_files` directory depending on the ROCm version.
|
||||
Outputted binaries should be in the `output` folder.
|
||||
|
||||
|
||||
## Pushing
|
||||
|
||||
Packages can be uploaded to an S3 bucket using:
|
||||
|
||||
```
|
||||
aws s3 cp output/*/magma-cuda*.bz2 <bucket-with-path>
|
||||
```
|
||||
|
||||
If you do not have upload permissions, please ping @seemethere or @soumith to gain access
|
||||
|
||||
## New versions
|
||||
|
||||
New ROCm versions can be added by creating a new make target with the next desired version. For ROCm version N.n, the target should be named `magma-rocmNn`.
|
||||
|
||||
Make sure to edit the appropriate environment variables (e.g., DESIRED_ROCM) in the `Makefile` accordingly. Remember also to check `build_magma.sh` to ensure the logic for copying over the files remains correct.
|
||||
@ -1,42 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
# Environment variables
|
||||
# The script expects DESIRED_CUDA and PACKAGE_NAME to be set
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
|
||||
# Version 2.7.2 + ROCm related updates
|
||||
MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
|
||||
|
||||
# Folders for the build
|
||||
PACKAGE_FILES=${ROOT_DIR}/magma-rocm/package_files # metadata
|
||||
PACKAGE_DIR=${ROOT_DIR}/magma-rocm/${PACKAGE_NAME} # build workspace
|
||||
PACKAGE_OUTPUT=${ROOT_DIR}/magma-rocm/output # where tarballs are stored
|
||||
PACKAGE_BUILD=${PACKAGE_DIR} # where the content of the tarball is prepared
|
||||
PACKAGE_RECIPE=${PACKAGE_BUILD}/info/recipe
|
||||
PACKAGE_LICENSE=${PACKAGE_BUILD}/info/licenses
|
||||
mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RECIPE} ${PACKAGE_LICENSE}
|
||||
|
||||
# Fetch magma sources and verify checksum
|
||||
pushd ${PACKAGE_DIR}
|
||||
git clone https://bitbucket.org/icl/magma.git
|
||||
pushd magma
|
||||
git checkout ${MAGMA_VERSION}
|
||||
popd
|
||||
popd
|
||||
|
||||
# build
|
||||
pushd ${PACKAGE_DIR}/magma
|
||||
# The build.sh script expects to be executed from the sources root folder
|
||||
INSTALL_DIR=${PACKAGE_BUILD} ${PACKAGE_FILES}/build.sh
|
||||
popd
|
||||
|
||||
# Package recipe, license and tarball
|
||||
# Folder and package name are backward compatible for the build workflow
|
||||
cp ${PACKAGE_FILES}/build.sh ${PACKAGE_RECIPE}/build.sh
|
||||
cp ${PACKAGE_DIR}/magma/COPYRIGHT ${PACKAGE_LICENSE}/COPYRIGHT
|
||||
pushd ${PACKAGE_BUILD}
|
||||
tar cjf ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2 include lib info
|
||||
echo Built in ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2
|
||||
popd
|
||||
@ -1,38 +0,0 @@
|
||||
# Magma build scripts need `python`
|
||||
ln -sf /usr/bin/python3 /usr/bin/python
|
||||
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
almalinux)
|
||||
yum install -y gcc-gfortran
|
||||
;;
|
||||
*)
|
||||
echo "No preinstalls to build magma..."
|
||||
;;
|
||||
esac
|
||||
|
||||
MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION}
|
||||
|
||||
cp make.inc-examples/make.inc.hip-gcc-mkl make.inc
|
||||
echo 'LIBDIR += -L$(MKLROOT)/lib' >> make.inc
|
||||
if [[ -f "${MKLROOT}/lib/libmkl_core.a" ]]; then
|
||||
echo 'LIB = -Wl,--start-group -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core -Wl,--end-group -lpthread -lstdc++ -lm -lgomp -lhipblas -lhipsparse' >> make.inc
|
||||
fi
|
||||
echo 'LIB += -Wl,--enable-new-dtags -Wl,--rpath,/opt/rocm/lib -Wl,--rpath,$(MKLROOT)/lib -Wl,--rpath,/opt/rocm/magma/lib -ldl' >> make.inc
|
||||
echo 'DEVCCFLAGS += --gpu-max-threads-per-block=256' >> make.inc
|
||||
export PATH="${PATH}:/opt/rocm/bin"
|
||||
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
|
||||
amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
|
||||
else
|
||||
amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
|
||||
fi
|
||||
for arch in $amdgpu_targets; do
|
||||
echo "DEVCCFLAGS += --offload-arch=$arch" >> make.inc
|
||||
done
|
||||
# hipcc with openmp flag may cause isnan() on __device__ not to be found; depending on context, compiler may attempt to match with host definition
|
||||
sed -i 's/^FOPENMP/#FOPENMP/g' make.inc
|
||||
make -f make.gen.hipMAGMA -j $(nproc)
|
||||
LANG=C.UTF-8 make lib/libmagma.so -j $(nproc) MKLROOT="${MKLROOT}"
|
||||
make testing/testing_dgemm -j $(nproc) MKLROOT="${MKLROOT}"
|
||||
cp -R lib ${INSTALL_DIR}
|
||||
cp -R include ${INSTALL_DIR}
|
||||
@ -12,13 +12,13 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
||||
-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_CUDA_SHORT} \
|
||||
-e DESIRED_CUDA=${DESIRED_CUDA} \
|
||||
-e CUDA_ARCH_LIST="${CUDA_ARCH_LIST}" \
|
||||
"pytorch/manylinux2_28-builder:cuda${DESIRED_CUDA}-main" \
|
||||
"pytorch/manylinux-builder:cuda${DESIRED_CUDA}-main" \
|
||||
magma/build_magma.sh
|
||||
|
||||
.PHONY: all
|
||||
all: magma-cuda128
|
||||
all: magma-cuda126
|
||||
all: magma-cuda124
|
||||
all: magma-cuda121
|
||||
all: magma-cuda118
|
||||
|
||||
.PHONY:
|
||||
@ -26,12 +26,6 @@ clean:
|
||||
$(RM) -r magma-*
|
||||
$(RM) -r output
|
||||
|
||||
.PHONY: magma-cuda128
|
||||
magma-cuda128: DESIRED_CUDA := 12.8
|
||||
magma-cuda128: CUDA_ARCH_LIST += -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120
|
||||
magma-cuda128:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-cuda126
|
||||
magma-cuda126: DESIRED_CUDA := 12.6
|
||||
magma-cuda126:
|
||||
@ -42,6 +36,11 @@ magma-cuda124: DESIRED_CUDA := 12.4
|
||||
magma-cuda124:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-cuda121
|
||||
magma-cuda121: DESIRED_CUDA := 12.1
|
||||
magma-cuda121:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-cuda118
|
||||
magma-cuda118: DESIRED_CUDA := 11.8
|
||||
magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_37,code=sm_37
|
||||
|
||||
@ -15,12 +15,9 @@ case "${GPU_ARCH_TYPE:-BLANK}" in
|
||||
rocm)
|
||||
bash "${SCRIPTPATH}/build_rocm.sh"
|
||||
;;
|
||||
cpu | cpu-cxx11-abi | cpu-s390x)
|
||||
cpu | cpu-cxx11-abi | cpu-s390x | xpu)
|
||||
bash "${SCRIPTPATH}/build_cpu.sh"
|
||||
;;
|
||||
xpu)
|
||||
bash "${SCRIPTPATH}/build_xpu.sh"
|
||||
;;
|
||||
*)
|
||||
echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..."
|
||||
exit 1
|
||||
|
||||
@ -18,14 +18,12 @@ retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
PLATFORM="manylinux2014_x86_64"
|
||||
# TODO move this into the Docker images
|
||||
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
PLATFORM="manylinux_2_28_x86_64"
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
retry dnf install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
@ -111,6 +109,12 @@ case ${DESIRED_PYTHON} in
|
||||
;;
|
||||
esac
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
@ -203,6 +207,12 @@ if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
|
||||
mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
|
||||
cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
|
||||
/tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip
|
||||
@ -243,11 +253,11 @@ make_wheel_record() {
|
||||
FPATH=$1
|
||||
if echo $FPATH | grep RECORD >/dev/null 2>&1; then
|
||||
# if the RECORD file, then
|
||||
echo "\"$FPATH\",,"
|
||||
echo "$FPATH,,"
|
||||
else
|
||||
HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
|
||||
FSIZE=$(ls -nl $FPATH | awk '{print $5}')
|
||||
echo "\"$FPATH\",sha256=$HASH,$FSIZE"
|
||||
echo "$FPATH,sha256=$HASH,$FSIZE"
|
||||
fi
|
||||
}
|
||||
|
||||
@ -367,12 +377,6 @@ for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.w
|
||||
$PATCHELF_BIN --print-rpath $sofile
|
||||
done
|
||||
|
||||
# create Manylinux 2_28 tag this needs to happen before regenerate the RECORD
|
||||
if [[ $PLATFORM == "manylinux_2_28_x86_64" && $GPU_ARCH_TYPE != "cpu-s390x" && $GPU_ARCH_TYPE != "xpu" ]]; then
|
||||
wheel_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/WHEEL/g')
|
||||
sed -i -e s#linux_x86_64#"${PLATFORM}"# $wheel_file;
|
||||
fi
|
||||
|
||||
# regenerate the RECORD file with new hashes
|
||||
record_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g')
|
||||
if [[ -e $record_file ]]; then
|
||||
@ -412,20 +416,12 @@ for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.w
|
||||
popd
|
||||
fi
|
||||
|
||||
# Rename wheel for Manylinux 2_28
|
||||
if [[ $PLATFORM == "manylinux_2_28_x86_64" && $GPU_ARCH_TYPE != "cpu-s390x" && $GPU_ARCH_TYPE != "xpu" ]]; then
|
||||
pkg_name=$(echo $(basename $pkg) | sed -e s#linux_x86_64#"${PLATFORM}"#)
|
||||
zip -rq $pkg_name $PREIX*
|
||||
rm -f $pkg
|
||||
mv $pkg_name $(dirname $pkg)/$pkg_name
|
||||
else
|
||||
# zip up the wheel back
|
||||
zip -rq $(basename $pkg) $PREIX*
|
||||
# remove original wheel
|
||||
rm -f $pkg
|
||||
mv $(basename $pkg) $pkg
|
||||
fi
|
||||
# zip up the wheel back
|
||||
zip -rq $(basename $pkg) $PREIX*
|
||||
|
||||
# replace original wheel
|
||||
rm -f $pkg
|
||||
mv $(basename $pkg) $pkg
|
||||
cd ..
|
||||
rm -rf tmp
|
||||
done
|
||||
@ -478,9 +474,9 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
echo "$(date) :: Running tests"
|
||||
pushd "$PYTORCH_ROOT"
|
||||
|
||||
|
||||
#TODO: run_tests.sh and check_binary.sh should be moved to pytorch/pytorch project
|
||||
LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
|
||||
"${PYTORCH_ROOT}/.ci/pytorch/run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
|
||||
"/builder/run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
|
||||
popd
|
||||
echo "$(date) :: Finished tests"
|
||||
fi
|
||||
|
||||
@ -2,6 +2,8 @@
|
||||
|
||||
set -ex
|
||||
|
||||
GPU_ARCH_TYPE=${GPU_ARCH_TYPE:-cpu}
|
||||
|
||||
export TH_BINARY_BUILD=1
|
||||
export USE_CUDA=0
|
||||
|
||||
@ -15,13 +17,22 @@ if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=()
|
||||
fi
|
||||
|
||||
WHEELHOUSE_DIR="wheelhousecpu"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_housecpu"
|
||||
DIR_SUFFIX=cpu
|
||||
if [[ "$GPU_ARCH_TYPE" == "xpu" ]]; then
|
||||
DIR_SUFFIX=xpu
|
||||
# Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
|
||||
source /opt/intel/oneapi/compiler/latest/env/vars.sh
|
||||
source /opt/intel/oneapi/pti/latest/env/vars.sh
|
||||
export USE_STATIC_MKL=1
|
||||
fi
|
||||
|
||||
WHEELHOUSE_DIR="wheelhouse$DIR_SUFFIX"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_house$DIR_SUFFIX"
|
||||
if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhousecpu"
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$DIR_SUFFIX"
|
||||
else
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_housecpu"
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$DIR_SUFFIX"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
@ -49,6 +60,24 @@ DEPS_SONAME=(
|
||||
"libgomp.so.1"
|
||||
)
|
||||
|
||||
if [[ "$GPU_ARCH_TYPE" == "xpu" ]]; then
|
||||
echo "Bundling with xpu support package libs."
|
||||
DEPS_LIST+=(
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libOpenCL.so.1"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libsvml.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libirng.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libimf.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libintlc.so.5"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libOpenCL.so.1"
|
||||
"libsvml.so"
|
||||
"libirng.so"
|
||||
"libimf.so"
|
||||
"libintlc.so.5"
|
||||
)
|
||||
fi
|
||||
|
||||
rm -rf /usr/local/cuda*
|
||||
|
||||
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
|
||||
@ -14,7 +14,6 @@ export USE_CUDA_STATIC_LINK=1
|
||||
export INSTALL_TEST=0 # dont install test binaries into site-packages
|
||||
export USE_CUPTI_SO=0
|
||||
export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
|
||||
export USE_CUFILE=${USE_CUFILE:-1}
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
@ -44,6 +43,13 @@ if [[ -n "$DESIRED_CUDA" ]]; then
|
||||
fi
|
||||
fi
|
||||
echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"
|
||||
|
||||
# There really has to be a better way to do this - eli
|
||||
# Possibly limiting builds to specific cuda versions be delimiting images would be a choice
|
||||
if [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
echo "Switching to CUDA version ${DESIRED_CUDA}"
|
||||
/builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}"
|
||||
fi
|
||||
else
|
||||
CUDA_VERSION=$(nvcc --version|grep release|cut -f5 -d" "|cut -f1 -d",")
|
||||
echo "CUDA $CUDA_VERSION Detected"
|
||||
@ -53,15 +59,15 @@ cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
|
||||
|
||||
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
|
||||
case ${CUDA_VERSION} in
|
||||
12.8)
|
||||
TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0;10.0;12.0+PTX" #removing sm_50-sm_70 as these architectures are deprecated in CUDA 12.8 and will be removed in future releases
|
||||
12.4 | 12.6)
|
||||
if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
|
||||
TORCH_CUDA_ARCH_LIST="9.0"
|
||||
else
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
|
||||
fi
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
12.6)
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
12.4)
|
||||
12.1)
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
@ -69,6 +75,10 @@ case ${CUDA_VERSION} in
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7;9.0"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
11.[67])
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
*)
|
||||
echo "unknown cuda version $CUDA_VERSION"
|
||||
exit 1
|
||||
@ -119,16 +129,7 @@ if [[ $USE_CUSPARSELT == "1" && $CUDA_VERSION == "11.8" ]]; then
|
||||
)
|
||||
fi
|
||||
|
||||
|
||||
# Turn USE_CUFILE off for CUDA 11.8, 12.4 since nvidia-cufile-cu11 and 1.9.0.20 are
|
||||
# not available in PYPI
|
||||
if [[ $CUDA_VERSION == "11.8" || $CUDA_VERSION == "12.4" ]]; then
|
||||
export USE_CUFILE=0
|
||||
fi
|
||||
|
||||
|
||||
# CUDA_VERSION 12.4, 12.6, 12.8
|
||||
if [[ $CUDA_VERSION == 12* ]]; then
|
||||
if [[ $CUDA_VERSION == "12.4" || $CUDA_VERSION == "12.6" ]]; then
|
||||
export USE_STATIC_CUDNN=0
|
||||
# Try parallelizing nvcc as well
|
||||
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
|
||||
@ -169,16 +170,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
|
||||
"libnvrtc.so.12"
|
||||
"libnvrtc-builtins.so"
|
||||
)
|
||||
if [[ $USE_CUFILE == 1 ]]; then
|
||||
DEPS_LIST+=(
|
||||
"/usr/local/cuda/lib64/libcufile.so.0"
|
||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libcufile.so.0"
|
||||
"libcufile_rdma.so.1"
|
||||
)
|
||||
fi
|
||||
else
|
||||
echo "Using nvidia libs from pypi."
|
||||
CUDA_RPATHS=(
|
||||
@ -195,11 +186,6 @@ if [[ $CUDA_VERSION == 12* ]]; then
|
||||
'$ORIGIN/../../nvidia/nccl/lib'
|
||||
'$ORIGIN/../../nvidia/nvtx/lib'
|
||||
)
|
||||
if [[ $USE_CUFILE == 1 ]]; then
|
||||
CUDA_RPATHS+=(
|
||||
'$ORIGIN/../../nvidia/cufile/lib'
|
||||
)
|
||||
fi
|
||||
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
|
||||
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
|
||||
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
|
||||
@ -285,7 +271,7 @@ else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# run_tests.sh requires DESIRED_CUDA to know what tests to exclude
|
||||
# builder/test.sh requires DESIRED_CUDA to know what tests to exclude
|
||||
export DESIRED_CUDA="$cuda_version_nodot"
|
||||
|
||||
# Switch `/usr/local/cuda` to the desired CUDA version
|
||||
|
||||
@ -95,6 +95,12 @@ python setup.py clean
|
||||
retry pip install -qr requirements.txt
|
||||
retry pip install -q numpy==2.0.1
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
@ -163,6 +169,12 @@ fi
|
||||
|
||||
)
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
(
|
||||
set -x
|
||||
|
||||
@ -213,11 +225,11 @@ make_wheel_record() {
|
||||
FPATH=$1
|
||||
if echo $FPATH | grep RECORD >/dev/null 2>&1; then
|
||||
# if the RECORD file, then
|
||||
echo "\"$FPATH\",,"
|
||||
echo "$FPATH,,"
|
||||
else
|
||||
HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
|
||||
FSIZE=$(ls -nl $FPATH | awk '{print $5}')
|
||||
echo "\"$FPATH\",sha256=$HASH,$FSIZE"
|
||||
echo "$FPATH,sha256=$HASH,$FSIZE"
|
||||
fi
|
||||
}
|
||||
|
||||
|
||||
@ -107,29 +107,17 @@ if [[ $ROCM_INT -ge 60200 ]]; then
|
||||
fi
|
||||
|
||||
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* || "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
LIBNUMA_PATH="/usr/lib64/libnuma.so.1"
|
||||
LIBELF_PATH="/usr/lib64/libelf.so.1"
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBTINFO_PATH="/usr/lib64/libtinfo.so.5"
|
||||
else
|
||||
LIBTINFO_PATH="/usr/lib64/libtinfo.so.6"
|
||||
fi
|
||||
LIBTINFO_PATH="/usr/lib64/libtinfo.so.5"
|
||||
LIBDRM_PATH="/opt/amdgpu/lib64/libdrm.so.2"
|
||||
LIBDRM_AMDGPU_PATH="/opt/amdgpu/lib64/libdrm_amdgpu.so.1"
|
||||
if [[ $ROCM_INT -ge 60100 && $ROCM_INT -lt 60300 ]]; then
|
||||
if [[ $ROCM_INT -ge 60100 ]]; then
|
||||
# Below libs are direct dependencies of libhipsolver
|
||||
LIBSUITESPARSE_CONFIG_PATH="/lib64/libsuitesparseconfig.so.4"
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBCHOLMOD_PATH="/lib64/libcholmod.so.2"
|
||||
# Below libs are direct dependencies of libsatlas
|
||||
LIBGFORTRAN_PATH="/lib64/libgfortran.so.3"
|
||||
else
|
||||
LIBCHOLMOD_PATH="/lib64/libcholmod.so.3"
|
||||
# Below libs are direct dependencies of libsatlas
|
||||
LIBGFORTRAN_PATH="/lib64/libgfortran.so.5"
|
||||
fi
|
||||
LIBCHOLMOD_PATH="/lib64/libcholmod.so.2"
|
||||
# Below libs are direct dependencies of libcholmod
|
||||
LIBAMD_PATH="/lib64/libamd.so.2"
|
||||
LIBCAMD_PATH="/lib64/libcamd.so.2"
|
||||
@ -137,6 +125,7 @@ if [[ "$OS_NAME" == *"CentOS Linux"* || "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
LIBCOLAMD_PATH="/lib64/libcolamd.so.2"
|
||||
LIBSATLAS_PATH="/lib64/atlas/libsatlas.so.3"
|
||||
# Below libs are direct dependencies of libsatlas
|
||||
LIBGFORTRAN_PATH="/lib64/libgfortran.so.3"
|
||||
LIBQUADMATH_PATH="/lib64/libquadmath.so.0"
|
||||
fi
|
||||
MAYBE_LIB64=lib64
|
||||
@ -151,7 +140,7 @@ elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
fi
|
||||
LIBDRM_PATH="/usr/lib/x86_64-linux-gnu/libdrm.so.2"
|
||||
LIBDRM_AMDGPU_PATH="/usr/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1"
|
||||
if [[ $ROCM_INT -ge 60100 && $ROCM_INT -lt 60300 ]]; then
|
||||
if [[ $ROCM_INT -ge 60100 ]]; then
|
||||
# Below libs are direct dependencies of libhipsolver
|
||||
LIBCHOLMOD_PATH="/lib/x86_64-linux-gnu/libcholmod.so.3"
|
||||
# Below libs are direct dependencies of libcholmod
|
||||
@ -186,6 +175,12 @@ do
|
||||
OS_SO_FILES[${#OS_SO_FILES[@]}]=$file_name # Append lib to array
|
||||
done
|
||||
|
||||
# PyTorch-version specific
|
||||
# AOTriton dependency only for PyTorch >= 2.4
|
||||
if (( $(echo "${PYTORCH_VERSION} 2.4" | awk '{print ($1 >= $2)}') )); then
|
||||
ROCM_SO_FILES+=("libaotriton_v2.so")
|
||||
fi
|
||||
|
||||
# rocBLAS library files
|
||||
ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
|
||||
ROCBLAS_LIB_DST=lib/rocblas/library
|
||||
|
||||
@ -1,108 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
export TH_BINARY_BUILD=1
|
||||
export USE_CUDA=0
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build()
|
||||
CMAKE_ARGS=()
|
||||
fi
|
||||
if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build_caffe2()
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=()
|
||||
fi
|
||||
|
||||
|
||||
# Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
|
||||
source /opt/intel/oneapi/compiler/latest/env/vars.sh
|
||||
source /opt/intel/oneapi/pti/latest/env/vars.sh
|
||||
source /opt/intel/oneapi/umf/latest/env/vars.sh
|
||||
export USE_STATIC_MKL=1
|
||||
|
||||
WHEELHOUSE_DIR="wheelhousexpu"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_housexpu"
|
||||
if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhousexpu"
|
||||
else
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_housexpu"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
|
||||
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
if [[ "$(uname -m)" == "s390x" ]]; then
|
||||
LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1"
|
||||
else
|
||||
LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
|
||||
fi
|
||||
fi
|
||||
|
||||
DEPS_LIST=(
|
||||
"$LIBGOMP_PATH"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libOpenCL.so.1"
|
||||
)
|
||||
|
||||
DEPS_SONAME=(
|
||||
"libgomp.so.1"
|
||||
"libOpenCL.so.1"
|
||||
)
|
||||
|
||||
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
|
||||
echo "Bundling with xpu support package libs."
|
||||
DEPS_LIST+=(
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libsycl.so.8"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libur_loader.so.0"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libur_adapter_level_zero.so.0"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libur_adapter_opencl.so.0"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libsvml.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libirng.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libimf.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libintlc.so.5"
|
||||
"/opt/intel/oneapi/pti/latest/lib/libpti_view.so.0.10"
|
||||
"/opt/intel/oneapi/umf/latest/lib/libumf.so.0"
|
||||
"/opt/intel/oneapi/tcm/latest/lib/libhwloc.so.15"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libsycl.so.8"
|
||||
"libur_loader.so.0"
|
||||
"libur_adapter_level_zero.so.0"
|
||||
"libur_adapter_opencl.so.0"
|
||||
"libsvml.so"
|
||||
"libirng.so"
|
||||
"libimf.so"
|
||||
"libintlc.so.5"
|
||||
"libpti_view.so.0.10"
|
||||
"libumf.so.0"
|
||||
"libhwloc.so.15"
|
||||
)
|
||||
else
|
||||
echo "Using xpu runtime libs from pypi."
|
||||
XPU_RPATHS=(
|
||||
'$ORIGIN/../../../..'
|
||||
)
|
||||
XPU_RPATHS=$(IFS=: ; echo "${XPU_RPATHS[*]}")
|
||||
export C_SO_RPATH=$XPU_RPATHS':$ORIGIN:$ORIGIN/lib'
|
||||
export LIB_SO_RPATH=$XPU_RPATHS':$ORIGIN'
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
fi
|
||||
|
||||
rm -rf /usr/local/cuda*
|
||||
|
||||
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
BUILD_SCRIPT=build_common.sh
|
||||
else
|
||||
BUILD_SCRIPT=build_libtorch.sh
|
||||
fi
|
||||
source ${SOURCE_DIR}/${BUILD_SCRIPT}
|
||||
@ -1,6 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex -o pipefail
|
||||
set -ex
|
||||
|
||||
# Required environment variable: $BUILD_ENVIRONMENT
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
@ -35,7 +35,7 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda11* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *clang* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *cuda11.3* && "$BUILD_ENVIRONMENT" != *clang* ]]; then
|
||||
# TODO: there is a linking issue when building with UCC using clang,
|
||||
# disable it for now and to be fix later.
|
||||
# TODO: disable UCC temporarily to enable CUDA 12.1 in CI
|
||||
@ -87,7 +87,7 @@ else
|
||||
|
||||
# Workaround required for MKL library linkage
|
||||
# https://github.com/pytorch/pytorch/issues/119557
|
||||
if [[ "$ANACONDA_PYTHON_VERSION" = "3.12" || "$ANACONDA_PYTHON_VERSION" = "3.13" ]]; then
|
||||
if [ "$ANACONDA_PYTHON_VERSION" = "3.12" ]; then
|
||||
export CMAKE_LIBRARY_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/lib/"
|
||||
export CMAKE_INCLUDE_PATH="/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/include/"
|
||||
fi
|
||||
@ -173,7 +173,6 @@ if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
source /opt/intel/oneapi/compiler/latest/env/vars.sh
|
||||
# XPU kineto feature dependencies are not fully ready, disable kineto build as temp WA
|
||||
export USE_KINETO=0
|
||||
export TORCH_XPU_ARCH_LIST=pvc
|
||||
fi
|
||||
|
||||
# sccache will fail for CUDA builds if all cores are used for compiling
|
||||
@ -192,7 +191,7 @@ fi
|
||||
|
||||
# We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
|
||||
# memory to build and will OOM
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]] && [ -z "$MAX_JOBS_OVERRIDE" ]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ "$TORCH_CUDA_ARCH_LIST" == *"8.6"* || "$TORCH_CUDA_ARCH_LIST" == *"8.0"* ]]; then
|
||||
echo "WARNING: FlashAttention files require large amounts of memory to build and will OOM"
|
||||
echo "Setting MAX_JOBS=(nproc-2)/3 to reduce memory usage"
|
||||
export MAX_JOBS="$(( $(nproc --ignore=2) / 3 ))"
|
||||
@ -229,9 +228,9 @@ if [[ "$BUILD_ENVIRONMENT" == *-debug* ]]; then
|
||||
export CMAKE_BUILD_TYPE=RelWithAssert
|
||||
fi
|
||||
|
||||
# Do not change workspace permissions for ROCm and s390x CI jobs
|
||||
# Do not change workspace permissions for ROCm CI jobs
|
||||
# as it can leave workspace with bad permissions for cancelled jobs
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
|
||||
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
|
||||
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
|
||||
cleanup_workspace() {
|
||||
@ -248,9 +247,10 @@ if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /v
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-bazel-* ]]; then
|
||||
set -e -o pipefail
|
||||
set -e
|
||||
|
||||
get_bazel
|
||||
install_sccache_nvcc_for_bazel
|
||||
|
||||
# Leave 1 CPU free and use only up to 80% of memory to reduce the change of crashing
|
||||
# the runner
|
||||
@ -277,13 +277,16 @@ else
|
||||
# or building non-XLA tests.
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* &&
|
||||
"$BUILD_ENVIRONMENT" != *xla* ]]; then
|
||||
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
|
||||
python -mpip install numpy==2.0.2
|
||||
if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
|
||||
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
|
||||
python -mpip install --pre numpy==2.0.2
|
||||
fi
|
||||
|
||||
WERROR=1 python setup.py clean
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
python3 tools/packaging/split_wheel.py bdist_wheel
|
||||
BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 python setup.py bdist_wheel
|
||||
BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 python setup.py bdist_wheel --cmake
|
||||
else
|
||||
WERROR=1 python setup.py bdist_wheel
|
||||
fi
|
||||
@ -301,18 +304,6 @@ else
|
||||
fi
|
||||
pip_install_whl "$(echo dist/*.whl)"
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
echo "Checking that xpu is compiled"
|
||||
pushd dist/
|
||||
if python -c 'import torch; exit(0 if torch.xpu._is_compiled() else 1)'; then
|
||||
echo "XPU support is compiled in."
|
||||
else
|
||||
echo "XPU support is NOT compiled in."
|
||||
exit 1
|
||||
fi
|
||||
popd
|
||||
fi
|
||||
|
||||
# TODO: I'm not sure why, but somehow we lose verbose commands
|
||||
set -x
|
||||
|
||||
@ -388,10 +379,8 @@ else
|
||||
# This is an attempt to mitigate flaky libtorch build OOM error. By default, the build parallelization
|
||||
# is set to be the number of CPU minus 2. So, let's try a more conservative value here. A 4xlarge has
|
||||
# 16 CPUs
|
||||
if [ -z "$MAX_JOBS_OVERRIDE" ]; then
|
||||
MAX_JOBS=$(nproc --ignore=4)
|
||||
export MAX_JOBS
|
||||
fi
|
||||
MAX_JOBS=$(nproc --ignore=4)
|
||||
export MAX_JOBS
|
||||
|
||||
# NB: Install outside of source directory (at the same level as the root
|
||||
# pytorch folder) so that it doesn't get cleaned away prior to docker push.
|
||||
@ -408,7 +397,7 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
|
||||
# don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
|
||||
python tools/stats/export_test_times.py
|
||||
fi
|
||||
# don't do this for bazel or s390x as they don't use sccache
|
||||
if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" != *s390x* ]]; then
|
||||
print_sccache_stats
|
||||
fi
|
||||
|
||||
@ -1,320 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# shellcheck disable=SC2086,SC2006,SC2207,SC2076,SC2155,SC2046,SC1091,SC2143
|
||||
# TODO: Re-enable shellchecks above
|
||||
|
||||
set -eux -o pipefail
|
||||
|
||||
# This script checks the following things on binaries
|
||||
# 1. The gcc abi matches DESIRED_DEVTOOLSET
|
||||
# 2. MacOS binaries do not link against OpenBLAS
|
||||
# 3. There are no protobuf symbols of any sort anywhere (turned off, because
|
||||
# this is currently not true)
|
||||
# 4. Standard Python imports work
|
||||
# 5. MKL is available everywhere except for MacOS wheels
|
||||
# 6. XNNPACK is available everywhere except for MacOS wheels
|
||||
# 7. CUDA is setup correctly and does not hang
|
||||
# 8. Magma is available for CUDA builds
|
||||
# 9. CuDNN is available for CUDA builds
|
||||
#
|
||||
# This script needs the env variables DESIRED_PYTHON, DESIRED_CUDA,
|
||||
# DESIRED_DEVTOOLSET and PACKAGE_TYPE
|
||||
#
|
||||
# This script expects PyTorch to be installed into the active Python (the
|
||||
# Python returned by `which python`). Or, if this is testing a libtorch
|
||||
# Pythonless binary, then it expects to be in the root folder of the unzipped
|
||||
# libtorch package.
|
||||
|
||||
|
||||
if [[ -z ${DESIRED_PYTHON:-} ]]; then
|
||||
export DESIRED_PYTHON=${MATRIX_PYTHON_VERSION:-}
|
||||
fi
|
||||
if [[ -z ${DESIRED_CUDA:-} ]]; then
|
||||
export DESIRED_CUDA=${MATRIX_DESIRED_CUDA:-}
|
||||
fi
|
||||
if [[ -z ${DESIRED_DEVTOOLSET:-} ]]; then
|
||||
export DESIRED_DEVTOOLSET=${MATRIX_DESIRED_DEVTOOLSET:-}
|
||||
fi
|
||||
if [[ -z ${PACKAGE_TYPE:-} ]]; then
|
||||
export PACKAGE_TYPE=${MATRIX_PACKAGE_TYPE:-}
|
||||
fi
|
||||
|
||||
# The install root depends on both the package type and the os
|
||||
# All MacOS packages use conda, even for the wheel packages.
|
||||
if [[ "$PACKAGE_TYPE" == libtorch ]]; then
|
||||
# NOTE: Only $PWD works on both CentOS and Ubuntu
|
||||
export install_root="$PWD"
|
||||
else
|
||||
|
||||
if [[ $DESIRED_PYTHON =~ ([0-9].[0-9]+)t ]]; then
|
||||
# For python that is maj.mint keep original version
|
||||
py_dot="$DESIRED_PYTHON"
|
||||
elif [[ $DESIRED_PYTHON =~ ([0-9].[0-9]+) ]]; then
|
||||
# Strip everything but major.minor from DESIRED_PYTHON version
|
||||
py_dot="${BASH_REMATCH[0]}"
|
||||
else
|
||||
echo "Unexpected ${DESIRED_PYTHON} format"
|
||||
exit 1
|
||||
fi
|
||||
export install_root="$(dirname $(which python))/../lib/python${py_dot}/site-packages/torch/"
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check GCC ABI
|
||||
###############################################################################
|
||||
|
||||
# NOTE: As of https://github.com/pytorch/pytorch/issues/126551 we only produce
|
||||
# wheels with cxx11-abi
|
||||
|
||||
echo "Checking that the gcc ABI is what we expect"
|
||||
if [[ "$(uname)" != 'Darwin' ]]; then
|
||||
# We also check that there are cxx11 symbols in libtorch
|
||||
#
|
||||
echo "Checking that symbols in libtorch.so have the right gcc abi"
|
||||
python3 "$(dirname ${BASH_SOURCE[0]})/smoke_test/check_binary_symbols.py"
|
||||
|
||||
echo "cxx11 symbols seem to be in order"
|
||||
fi # if on Darwin
|
||||
|
||||
###############################################################################
|
||||
# Check for no OpenBLAS
|
||||
# TODO Check for no Protobuf symbols (not finished)
|
||||
# Print *all* runtime dependencies
|
||||
###############################################################################
|
||||
# We have to loop through all shared libraries for this
|
||||
if [[ "$(uname)" == 'Darwin' ]]; then
|
||||
all_dylibs=($(find "$install_root" -name '*.dylib'))
|
||||
for dylib in "${all_dylibs[@]}"; do
|
||||
echo "All dependencies of $dylib are $(otool -L $dylib) with rpath $(otool -l $dylib | grep LC_RPATH -A2)"
|
||||
|
||||
# Check that OpenBlas is not linked to on Macs
|
||||
echo "Checking the OpenBLAS is not linked to"
|
||||
if [[ -n "$(otool -L $dylib | grep -i openblas)" ]]; then
|
||||
echo "ERROR: Found openblas as a dependency of $dylib"
|
||||
echo "Full dependencies is: $(otool -L $dylib)"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check for protobuf symbols
|
||||
#proto_symbols="$(nm $dylib | grep protobuf)" || true
|
||||
#if [[ -n "$proto_symbols" ]]; then
|
||||
# echo "ERROR: Detected protobuf symbols in $dylib"
|
||||
# echo "Symbols are $proto_symbols"
|
||||
# exit 1
|
||||
#fi
|
||||
done
|
||||
else
|
||||
all_libs=($(find "$install_root" -name '*.so'))
|
||||
for lib in "${all_libs[@]}"; do
|
||||
echo "All dependencies of $lib are $(ldd $lib) with runpath $(objdump -p $lib | grep RUNPATH)"
|
||||
|
||||
# Check for protobuf symbols
|
||||
#proto_symbols=$(nm $lib | grep protobuf) || true
|
||||
#if [[ -n "$proto_symbols" ]]; then
|
||||
# echo "ERROR: Detected protobuf symbols in $lib"
|
||||
# echo "Symbols are $proto_symbols"
|
||||
# exit 1
|
||||
#fi
|
||||
done
|
||||
fi
|
||||
|
||||
setup_link_flags () {
|
||||
REF_LIB="-Wl,-R${install_root}/lib"
|
||||
if [[ "$(uname)" == 'Darwin' ]]; then
|
||||
REF_LIB="-Wl,-rpath ${install_root}/lib"
|
||||
fi
|
||||
ADDITIONAL_LINKER_FLAGS=""
|
||||
if [[ "$(uname)" == 'Linux' ]]; then
|
||||
ADDITIONAL_LINKER_FLAGS="-Wl,--no-as-needed"
|
||||
fi
|
||||
C10_LINK_FLAGS=""
|
||||
if [ -f "${install_root}/lib/libc10.so" ] || [ -f "${install_root}/lib/libc10.dylib" ]; then
|
||||
C10_LINK_FLAGS="-lc10"
|
||||
fi
|
||||
TORCH_CPU_LINK_FLAGS=""
|
||||
if [ -f "${install_root}/lib/libtorch_cpu.so" ] || [ -f "${install_root}/lib/libtorch_cpu.dylib" ]; then
|
||||
TORCH_CPU_LINK_FLAGS="-ltorch_cpu"
|
||||
fi
|
||||
TORCH_CUDA_LINK_FLAGS=""
|
||||
if [ -f "${install_root}/lib/libtorch_cuda.so" ] || [ -f "${install_root}/lib/libtorch_cuda.dylib" ]; then
|
||||
TORCH_CUDA_LINK_FLAGS="-ltorch_cuda"
|
||||
elif [ -f "${install_root}/lib/libtorch_cuda_cpp.so" ] && [ -f "${install_root}/lib/libtorch_cuda_cpp.so" ] || \
|
||||
[ -f "${install_root}/lib/libtorch_cuda_cu.dylib" ] && [ -f "${install_root}/lib/libtorch_cuda_cu.dylib" ]; then
|
||||
TORCH_CUDA_LINK_FLAGS="-ltorch_cuda_cpp -ltorch_cuda_cu"
|
||||
fi
|
||||
}
|
||||
|
||||
TEST_CODE_DIR="$(dirname $(realpath ${BASH_SOURCE[0]}))/test_example_code"
|
||||
build_and_run_example_cpp () {
|
||||
setup_link_flags
|
||||
g++ ${TEST_CODE_DIR}/$1.cpp -I${install_root}/include -I${install_root}/include/torch/csrc/api/include -std=gnu++17 -L${install_root}/lib ${REF_LIB} ${ADDITIONAL_LINKER_FLAGS} -ltorch $TORCH_CPU_LINK_FLAGS $TORCH_CUDA_LINK_FLAGS $C10_LINK_FLAGS -o $1
|
||||
./$1
|
||||
}
|
||||
|
||||
###############################################################################
|
||||
# Check simple Python/C++ calls
|
||||
###############################################################################
|
||||
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||
# NS: Set LD_LIBRARY_PATH for CUDA builds, but perhaps it should be removed
|
||||
if [[ "$DESIRED_CUDA" == "cu"* ]]; then
|
||||
export LD_LIBRARY_PATH=/usr/local/cuda/lib64
|
||||
fi
|
||||
build_and_run_example_cpp simple-torch-test
|
||||
else
|
||||
pushd /tmp
|
||||
python -c 'import torch'
|
||||
popd
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check torch.git_version
|
||||
###############################################################################
|
||||
if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then
|
||||
pushd /tmp
|
||||
python -c 'import torch; assert torch.version.git_version != "Unknown"'
|
||||
python -c 'import torch; assert torch.version.git_version != None'
|
||||
popd
|
||||
fi
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Check for MKL
|
||||
###############################################################################
|
||||
|
||||
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||
echo "Checking that MKL is available"
|
||||
build_and_run_example_cpp check-torch-mkl
|
||||
elif [[ "$(uname -m)" != "arm64" && "$(uname -m)" != "s390x" ]]; then
|
||||
if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]]; then
|
||||
if [[ "$(uname -m)" == "aarch64" ]]; then
|
||||
echo "Checking that MKLDNN is available on aarch64"
|
||||
pushd /tmp
|
||||
python -c 'import torch; exit(0 if torch.backends.mkldnn.is_available() else 1)'
|
||||
popd
|
||||
else
|
||||
echo "Checking that MKL is available"
|
||||
pushd /tmp
|
||||
python -c 'import torch; exit(0 if torch.backends.mkl.is_available() else 1)'
|
||||
popd
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check for XNNPACK
|
||||
###############################################################################
|
||||
|
||||
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||
echo "Checking that XNNPACK is available"
|
||||
build_and_run_example_cpp check-torch-xnnpack
|
||||
else
|
||||
if [[ "$(uname)" != 'Darwin' || "$PACKAGE_TYPE" != *wheel ]] && [[ "$(uname -m)" != "s390x" ]]; then
|
||||
echo "Checking that XNNPACK is available"
|
||||
pushd /tmp
|
||||
python -c 'import torch.backends.xnnpack; exit(0 if torch.backends.xnnpack.enabled else 1)'
|
||||
popd
|
||||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check XPU configured correctly
|
||||
###############################################################################
|
||||
if [[ "$DESIRED_CUDA" == 'xpu' && "$PACKAGE_TYPE" != 'libtorch' ]]; then
|
||||
echo "Checking that xpu is compiled"
|
||||
python -c 'import torch; exit(0 if torch.xpu._is_compiled() else 1)'
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check CUDA configured correctly
|
||||
###############################################################################
|
||||
# Skip these for Windows machines without GPUs
|
||||
if [[ "$OSTYPE" == "msys" ]]; then
|
||||
GPUS=$(wmic path win32_VideoController get name)
|
||||
if [[ ! "$GPUS" == *NVIDIA* ]]; then
|
||||
echo "Skip CUDA tests for machines without a Nvidia GPU card"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test that CUDA builds are setup correctly
|
||||
if [[ "$DESIRED_CUDA" != 'cpu' && "$DESIRED_CUDA" != 'xpu' && "$DESIRED_CUDA" != 'cpu-cxx11-abi' && "$DESIRED_CUDA" != *"rocm"* && "$(uname -m)" != "s390x" ]]; then
|
||||
if [[ "$PACKAGE_TYPE" == 'libtorch' ]]; then
|
||||
build_and_run_example_cpp check-torch-cuda
|
||||
else
|
||||
pushd /tmp
|
||||
echo "Checking that CUDA archs are setup correctly"
|
||||
timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()'
|
||||
|
||||
# These have to run after CUDA is initialized
|
||||
|
||||
echo "Checking that magma is available"
|
||||
python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)'
|
||||
|
||||
echo "Checking that CuDNN is available"
|
||||
python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)'
|
||||
|
||||
# Validates builds is free of linker regressions reported in https://github.com/pytorch/pytorch/issues/57744
|
||||
echo "Checking that exception handling works"
|
||||
python -c "import torch; from unittest import TestCase;TestCase().assertRaises(RuntimeError, lambda:torch.eye(7, 7, device='cuda:7'))"
|
||||
|
||||
echo "Checking that basic RNN works"
|
||||
python ${TEST_CODE_DIR}/rnn_smoke.py
|
||||
|
||||
echo "Checking that basic CNN works"
|
||||
python "${TEST_CODE_DIR}/cnn_smoke.py"
|
||||
|
||||
echo "Test that linalg works"
|
||||
python -c "import torch;x=torch.rand(3,3,device='cuda');print(torch.linalg.svd(torch.mm(x.t(), x)))"
|
||||
|
||||
popd
|
||||
fi # if libtorch
|
||||
fi # if cuda
|
||||
|
||||
##########################
|
||||
# Run parts of smoke tests
|
||||
##########################
|
||||
if [[ "$PACKAGE_TYPE" != 'libtorch' ]]; then
|
||||
pushd "$(dirname ${BASH_SOURCE[0]})/smoke_test"
|
||||
python -c "from smoke_test import test_linalg; test_linalg()"
|
||||
if [[ "$DESIRED_CUDA" == *cuda* ]]; then
|
||||
python -c "from smoke_test import test_linalg; test_linalg('cuda')"
|
||||
fi
|
||||
popd
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check PyTorch supports TCP_TLS gloo transport
|
||||
###############################################################################
|
||||
|
||||
if [[ "$(uname)" == 'Linux' && "$PACKAGE_TYPE" != 'libtorch' ]]; then
|
||||
GLOO_CHECK="import torch.distributed as dist
|
||||
try:
|
||||
dist.init_process_group('gloo', rank=0, world_size=1)
|
||||
except RuntimeError as e:
|
||||
print(e)
|
||||
"
|
||||
RESULT=`GLOO_DEVICE_TRANSPORT=TCP_TLS MASTER_ADDR=localhost MASTER_PORT=63945 python -c "$GLOO_CHECK"`
|
||||
GLOO_TRANSPORT_IS_NOT_SUPPORTED='gloo transport is not supported'
|
||||
if [[ "$RESULT" =~ "$GLOO_TRANSPORT_IS_NOT_SUPPORTED" ]]; then
|
||||
echo "PyTorch doesn't support TLS_TCP transport, please build with USE_GLOO_WITH_OPENSSL=1"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Check for C++ ABI compatibility to GCC-11
|
||||
###############################################################################
|
||||
if [[ "$(uname)" == 'Linux' && "$PACKAGE_TYPE" == 'manywheel' ]]; then
|
||||
pushd /tmp
|
||||
# Per https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html gcc-11 is ABI16
|
||||
# Though manylinux_2.28 should have been build with gcc-14, per
|
||||
# https://github.com/pypa/manylinux?tab=readme-ov-file#manylinux_2_28-almalinux-8-based
|
||||
# On s390x gcc 14 is used because it contains fix for interaction
|
||||
# between precompiled headers and vectorization builtins.
|
||||
# This fix is not available in earlier gcc versions.
|
||||
# gcc-14 uses ABI19.
|
||||
if [[ "$(uname -m)" != "s390x" ]]; then
|
||||
python -c "import torch; exit(0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi1016' else 1)"
|
||||
fi
|
||||
popd
|
||||
fi
|
||||
@ -3,7 +3,7 @@
|
||||
# Common setup for all Jenkins scripts
|
||||
# shellcheck source=./common_utils.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
set -ex -o pipefail
|
||||
set -ex
|
||||
|
||||
# Required environment variables:
|
||||
# $BUILD_ENVIRONMENT (should be set by your Docker image)
|
||||
|
||||
@ -111,6 +111,26 @@ function get_bazel() {
|
||||
chmod u+x tools/bazel
|
||||
}
|
||||
|
||||
# This function is bazel specific because of the bug
|
||||
# in the bazel that requires some special paths massaging
|
||||
# as a workaround. See
|
||||
# https://github.com/bazelbuild/bazel/issues/10167
|
||||
function install_sccache_nvcc_for_bazel() {
|
||||
sudo mv /usr/local/cuda/bin/nvcc /usr/local/cuda/bin/nvcc-real
|
||||
|
||||
# Write the `/usr/local/cuda/bin/nvcc`
|
||||
cat << EOF | sudo tee /usr/local/cuda/bin/nvcc
|
||||
#!/bin/sh
|
||||
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
|
||||
exec sccache /usr/local/cuda/bin/nvcc "\$@"
|
||||
else
|
||||
exec external/local_cuda/cuda/bin/nvcc-real "\$@"
|
||||
fi
|
||||
EOF
|
||||
|
||||
sudo chmod +x /usr/local/cuda/bin/nvcc
|
||||
}
|
||||
|
||||
function install_monkeytype {
|
||||
# Install MonkeyType
|
||||
pip_install MonkeyType
|
||||
@ -160,7 +180,7 @@ function install_torchvision() {
|
||||
}
|
||||
|
||||
function install_tlparse() {
|
||||
pip_install --user "tlparse==0.3.30"
|
||||
pip_install --user "tlparse==0.3.25"
|
||||
PATH="$(python -m site --user-base)/bin:$PATH"
|
||||
}
|
||||
|
||||
@ -169,34 +189,24 @@ function install_torchrec_and_fbgemm() {
|
||||
torchrec_commit=$(get_pinned_commit torchrec)
|
||||
local fbgemm_commit
|
||||
fbgemm_commit=$(get_pinned_commit fbgemm)
|
||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then
|
||||
fbgemm_commit=$(get_pinned_commit fbgemm_rocm)
|
||||
fi
|
||||
pip_uninstall torchrec-nightly
|
||||
pip_uninstall fbgemm-gpu-nightly
|
||||
pip_install setuptools-git-versioning scikit-build pyre-extensions
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *rocm* ]] ; then
|
||||
# install torchrec first because it installs fbgemm nightly on top of rocm fbgemm
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
|
||||
pip_uninstall fbgemm-gpu-nightly
|
||||
# TODO (huydhn): I still have no clue on why sccache doesn't work with only fbgemm_gpu here, but it
|
||||
# seems to be an sccache-related issue
|
||||
if [[ "$IS_A100_RUNNER" == "1" ]]; then
|
||||
unset CMAKE_CUDA_COMPILER_LAUNCHER
|
||||
sudo mv /opt/cache/bin /opt/cache/bin-backup
|
||||
fi
|
||||
|
||||
pip_install tabulate # needed for newer fbgemm
|
||||
pip_install patchelf # needed for rocm fbgemm
|
||||
git clone --recursive https://github.com/pytorch/fbgemm
|
||||
pushd fbgemm/fbgemm_gpu
|
||||
git checkout "${fbgemm_commit}"
|
||||
python setup.py install \
|
||||
--package_variant=rocm \
|
||||
-DHIP_ROOT_DIR="${ROCM_PATH}" \
|
||||
-DCMAKE_C_FLAGS="-DTORCH_USE_HIP_DSA" \
|
||||
-DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
|
||||
popd
|
||||
rm -rf fbgemm
|
||||
else
|
||||
# See https://github.com/pytorch/pytorch/issues/106971
|
||||
CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
|
||||
# See https://github.com/pytorch/pytorch/issues/106971
|
||||
CUDA_PATH=/usr/local/cuda-12.1 pip_install --no-use-pep517 --user "git+https://github.com/pytorch/FBGEMM.git@${fbgemm_commit}#egg=fbgemm-gpu&subdirectory=fbgemm_gpu"
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/torchrec.git@${torchrec_commit}"
|
||||
|
||||
if [[ "$IS_A100_RUNNER" == "1" ]]; then
|
||||
export CMAKE_CUDA_COMPILER_LAUNCHER=/opt/cache/bin/sccache
|
||||
sudo mv /opt/cache/bin-backup /opt/cache/bin
|
||||
fi
|
||||
}
|
||||
|
||||
@ -218,6 +228,9 @@ function checkout_install_torchbench() {
|
||||
git clone https://github.com/pytorch/benchmark torchbench
|
||||
pushd torchbench
|
||||
git checkout "$commit"
|
||||
rm -rf torchbenchmark/models/*
|
||||
git checkout -- torchbenchmark/models/__init__.py
|
||||
git checkout -- torchbenchmark/models/hf_T5
|
||||
|
||||
if [ "$1" ]; then
|
||||
python install.py --continue_on_fail models "$@"
|
||||
@ -226,22 +239,11 @@ function checkout_install_torchbench() {
|
||||
# to install and test other models
|
||||
python install.py --continue_on_fail
|
||||
fi
|
||||
|
||||
# TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
|
||||
# is regressing speedup metric. This needs to be investigated further
|
||||
pip install transformers==4.38.1
|
||||
|
||||
echo "Print all dependencies after TorchBench is installed"
|
||||
python -mpip freeze
|
||||
popd
|
||||
}
|
||||
|
||||
function install_torchao() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit torchao)
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/ao.git@${commit}"
|
||||
}
|
||||
|
||||
function print_sccache_stats() {
|
||||
echo 'PyTorch Build Statistics'
|
||||
sccache --show-stats
|
||||
|
||||
@ -40,7 +40,7 @@ echo "Building PyTorch C++ API docs..."
|
||||
rm -rf cppdocs
|
||||
git clone https://github.com/pytorch/cppdocs
|
||||
|
||||
set -ex -o pipefail
|
||||
set -ex
|
||||
|
||||
# Generate ATen files
|
||||
pushd "${pt_checkout}"
|
||||
|
||||
@ -5,7 +5,7 @@ pt_checkout="/var/lib/jenkins/workspace"
|
||||
source "$pt_checkout/.ci/pytorch/common_utils.sh"
|
||||
echo "functorch_doc_push_script.sh: Invoked with $*"
|
||||
|
||||
set -ex -o pipefail
|
||||
set -ex
|
||||
|
||||
version=${DOCS_VERSION:-nightly}
|
||||
echo "version: $version"
|
||||
|
||||
@ -6,7 +6,7 @@
|
||||
# return the same thing, ex checks for for rocm, CUDA, and changing the path
|
||||
# where sccache is installed, and not changing /etc/environment.
|
||||
|
||||
set -ex -o pipefail
|
||||
set -ex
|
||||
|
||||
install_binary() {
|
||||
echo "Downloading sccache binary from S3 repo"
|
||||
|
||||
@ -33,15 +33,56 @@ if which sccache > /dev/null; then
|
||||
export PATH="${tmp_dir}:$PATH"
|
||||
fi
|
||||
|
||||
print_cmake_info
|
||||
if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then
|
||||
# Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls
|
||||
USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel
|
||||
else
|
||||
cross_compile_arm64() {
|
||||
# Cross compilation for arm64
|
||||
# Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests
|
||||
# that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448
|
||||
USE_DISTRIBUTED=0 CMAKE_OSX_ARCHITECTURES=arm64 MACOSX_DEPLOYMENT_TARGET=11.0 USE_MKLDNN=OFF USE_QNNPACK=OFF WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||
}
|
||||
|
||||
compile_arm64() {
|
||||
# Compilation for arm64
|
||||
# TODO: Compile with OpenMP support (but this causes CI regressions as cross-compilation were done with OpenMP disabled)
|
||||
USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel
|
||||
}
|
||||
|
||||
compile_x86_64() {
|
||||
USE_DISTRIBUTED=0 WERROR=1 python setup.py bdist_wheel --plat-name=macosx_10_9_x86_64
|
||||
}
|
||||
|
||||
build_lite_interpreter() {
|
||||
echo "Testing libtorch (lite interpreter)."
|
||||
|
||||
CPP_BUILD="$(pwd)/../cpp_build"
|
||||
# Ensure the removal of the tmp directory
|
||||
trap 'rm -rfv ${CPP_BUILD}' EXIT
|
||||
rm -rf "${CPP_BUILD}"
|
||||
mkdir -p "${CPP_BUILD}/caffe2"
|
||||
|
||||
# It looks libtorch need to be built in "${CPP_BUILD}/caffe2 folder.
|
||||
BUILD_LIBTORCH_PY=$PWD/tools/build_libtorch.py
|
||||
pushd "${CPP_BUILD}/caffe2" || exit
|
||||
VERBOSE=1 DEBUG=1 python "${BUILD_LIBTORCH_PY}"
|
||||
popd || exit
|
||||
|
||||
"${CPP_BUILD}/caffe2/build/bin/test_lite_interpreter_runtime"
|
||||
}
|
||||
|
||||
print_cmake_info
|
||||
|
||||
if [[ ${BUILD_ENVIRONMENT} = *arm64* ]]; then
|
||||
if [[ $(uname -m) == "arm64" ]]; then
|
||||
compile_arm64
|
||||
else
|
||||
cross_compile_arm64
|
||||
fi
|
||||
elif [[ ${BUILD_ENVIRONMENT} = *lite-interpreter* ]]; then
|
||||
export BUILD_LITE_INTERPRETER=1
|
||||
build_lite_interpreter
|
||||
else
|
||||
compile_x86_64
|
||||
fi
|
||||
|
||||
if which sccache > /dev/null; then
|
||||
print_sccache_stats
|
||||
fi
|
||||
|
||||
@ -18,9 +18,6 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available(
|
||||
fi
|
||||
popd
|
||||
|
||||
# enable debug asserts in serialization
|
||||
export TORCH_SERIALIZATION_DEBUG=1
|
||||
|
||||
setup_test_python() {
|
||||
# The CircleCI worker hostname doesn't resolve to an address.
|
||||
# This environment variable makes ProcessGroupGloo default to
|
||||
@ -193,19 +190,11 @@ test_torchbench_perf() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
local backend=eager
|
||||
local dtype=notset
|
||||
local device=mps
|
||||
|
||||
echo "Setup complete, launching torchbench training performance run"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||
--performance --backend "$backend" --training --devices "$device" \
|
||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
|
||||
echo "Launching torchbench inference performance run"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||
--performance --backend "$backend" --inference --devices "$device" \
|
||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
|
||||
echo "Pytorch benchmark on mps device completed"
|
||||
}
|
||||
@ -220,41 +209,26 @@ test_torchbench_smoketest() {
|
||||
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
touch "$TEST_REPORTS_DIR"/torchbench_training.csv
|
||||
touch "$TEST_REPORTS_DIR"/torchbench_inference.csv
|
||||
|
||||
local device=mps
|
||||
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam pytorch_unet stable_diffusion_text_encoder moco speech_transformer)
|
||||
echo "Setup complete, launching torchbench training performance run"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_T5 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only llama --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only BERT_pytorch --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only dcgan --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_GPT2 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only yolov3 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only resnet152 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
|
||||
for backend in eager inductor; do
|
||||
|
||||
for dtype in notset float16 bfloat16; do
|
||||
echo "Launching torchbench inference performance run for backend ${backend} and dtype ${dtype}"
|
||||
local dtype_arg="--${dtype}"
|
||||
if [ "$dtype" == notset ]; then
|
||||
dtype_arg="--float32"
|
||||
fi
|
||||
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv"
|
||||
for model in "${models[@]}"; do
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||
--performance --only "$model" --backend "$backend" --inference --devices "$device" "$dtype_arg" \
|
||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_inference_${device}_performance.csv" || true
|
||||
done
|
||||
done
|
||||
|
||||
for dtype in notset amp; do
|
||||
echo "Launching torchbench training performance run for backend ${backend} and dtype ${dtype}"
|
||||
touch "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv"
|
||||
local dtype_arg="--${dtype}"
|
||||
if [ "$dtype" == notset ]; then
|
||||
dtype_arg="--float32"
|
||||
fi
|
||||
for model in "${models[@]}"; do
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
||||
--performance --only "$model" --backend "$backend" --training --devices "$device" "$dtype_arg" \
|
||||
--output "$TEST_REPORTS_DIR/inductor_${backend}_torchbench_${dtype}_training_${device}_performance.csv" || true
|
||||
done
|
||||
done
|
||||
|
||||
done
|
||||
echo "Launching torchbench inference performance run"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_T5 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only llama --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only BERT_pytorch --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only dcgan --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_GPT2 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only yolov3 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only resnet152 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
|
||||
echo "Pytorch benchmark on mps device completed"
|
||||
}
|
||||
@ -293,6 +267,25 @@ test_timm_perf() {
|
||||
|
||||
install_tlparse
|
||||
|
||||
if [[ $TEST_CONFIG == *"test_mps"* ]]; then
|
||||
if [[ $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_python_shard "${SHARD_NUMBER}"
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
|
||||
test_jit_hooks
|
||||
test_custom_backend
|
||||
fi
|
||||
else
|
||||
test_python_all
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
test_jit_hooks
|
||||
test_custom_backend
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ $TEST_CONFIG == *"perf_all"* ]]; then
|
||||
test_torchbench_perf
|
||||
test_hf_perf
|
||||
@ -305,19 +298,4 @@ elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then
|
||||
test_timm_perf
|
||||
elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then
|
||||
test_torchbench_smoketest
|
||||
elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_python_shard "${SHARD_NUMBER}"
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
|
||||
test_jit_hooks
|
||||
test_custom_backend
|
||||
fi
|
||||
else
|
||||
test_python_all
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
test_jit_hooks
|
||||
test_custom_backend
|
||||
fi
|
||||
|
||||
@ -8,62 +8,55 @@
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
echo "Testing pytorch"
|
||||
# When adding more tests, please use HUD to see which shard is shorter
|
||||
if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
|
||||
# FSDP tests
|
||||
for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
|
||||
fi
|
||||
time python test/run_test.py --include test_cuda_multigpu test_cuda_primary_ctx --verbose
|
||||
|
||||
if [[ "${SHARD_NUMBER:-2}" == "2" ]]; then
|
||||
time python test/run_test.py --include test_cuda_multigpu test_cuda_primary_ctx --verbose
|
||||
# Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
|
||||
# python tools/download_mnist.py --quiet -d test/cpp/api/mnist
|
||||
# OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_common
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_gloo
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_nccl
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
|
||||
time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering
|
||||
time python test/run_test.py --verbose -i distributed/test_store
|
||||
time python test/run_test.py --verbose -i distributed/test_symmetric_memory
|
||||
time python test/run_test.py --verbose -i distributed/test_pg_wrapper
|
||||
time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
|
||||
# FSDP tests
|
||||
for f in test/distributed/fsdp/*.py ; do time python test/run_test.py --verbose -i "${f#*/}" ; done
|
||||
# ShardedTensor tests
|
||||
time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
|
||||
time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
|
||||
|
||||
# Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
|
||||
# python tools/download_mnist.py --quiet -d test/cpp/api/mnist
|
||||
# OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="test/cpp/api/mnist" build/bin/test_api
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_common
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_gloo
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_nccl
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_gloo
|
||||
time python test/run_test.py --verbose -i distributed/test_c10d_spawn_nccl
|
||||
time python test/run_test.py --verbose -i distributed/test_compute_comm_reordering
|
||||
time python test/run_test.py --verbose -i distributed/test_store
|
||||
time python test/run_test.py --verbose -i distributed/test_symmetric_memory
|
||||
time python test/run_test.py --verbose -i distributed/test_pg_wrapper
|
||||
time python test/run_test.py --verbose -i distributed/rpc/cuda/test_tensorpipe_agent
|
||||
# functional collective tests
|
||||
time python test/run_test.py --verbose -i distributed/test_functional_api
|
||||
|
||||
# ShardedTensor tests
|
||||
time python test/run_test.py --verbose -i distributed/checkpoint/test_checkpoint
|
||||
time python test/run_test.py --verbose -i distributed/checkpoint/test_file_system_checkpoint
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharding_spec/test_sharding_spec
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharding_plan/test_sharding_plan
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor
|
||||
time python test/run_test.py --verbose -i distributed/_shard/sharded_tensor/test_sharded_tensor_reshard
|
||||
# DTensor tests
|
||||
time python test/run_test.py --verbose -i distributed/_tensor/test_random_ops
|
||||
time python test/run_test.py --verbose -i distributed/_tensor/test_dtensor_compile
|
||||
|
||||
# functional collective tests
|
||||
time python test/run_test.py --verbose -i distributed/test_functional_api
|
||||
# DeviceMesh test
|
||||
time python test/run_test.py --verbose -i distributed/test_device_mesh
|
||||
|
||||
# DTensor tests
|
||||
time python test/run_test.py --verbose -i distributed/tensor/test_random_ops
|
||||
time python test/run_test.py --verbose -i distributed/tensor/test_dtensor_compile
|
||||
# DTensor/TP tests
|
||||
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_examples
|
||||
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_random_state
|
||||
|
||||
# DeviceMesh test
|
||||
time python test/run_test.py --verbose -i distributed/test_device_mesh
|
||||
# FSDP2 tests
|
||||
time python test/run_test.py --verbose -i distributed/_composable/fsdp/test_fully_shard_training -- -k test_2d_mlp_with_nd_mesh
|
||||
|
||||
# DTensor/TP tests
|
||||
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_examples
|
||||
time python test/run_test.py --verbose -i distributed/tensor/parallel/test_tp_random_state
|
||||
# ND composability tests
|
||||
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_2d_composability
|
||||
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_pp_composability
|
||||
|
||||
# FSDP2 tests
|
||||
time python test/run_test.py --verbose -i distributed/_composable/fsdp/test_fully_shard_training -- -k test_2d_mlp_with_nd_mesh
|
||||
|
||||
# ND composability tests
|
||||
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_2d_composability
|
||||
time python test/run_test.py --verbose -i distributed/_composable/test_composability/test_pp_composability
|
||||
|
||||
# Other tests
|
||||
time python test/run_test.py --verbose -i test_cuda_primary_ctx
|
||||
time python test/run_test.py --verbose -i test_optim -- -k test_forloop_goes_right_direction_multigpu
|
||||
time python test/run_test.py --verbose -i test_optim -- -k test_mixed_device_dtype
|
||||
time python test/run_test.py --verbose -i test_foreach -- -k test_tensors_grouping
|
||||
fi
|
||||
# Other tests
|
||||
time python test/run_test.py --verbose -i test_cuda_primary_ctx
|
||||
time python test/run_test.py --verbose -i test_optim -- -k test_forloop_goes_right_direction_multigpu
|
||||
time python test/run_test.py --verbose -i test_optim -- -k test_mixed_device_dtype
|
||||
time python test/run_test.py --verbose -i test_foreach -- -k test_tensors_grouping
|
||||
assert_git_not_dirty
|
||||
|
||||
@ -7,7 +7,7 @@ source "$pt_checkout/.ci/pytorch/common_utils.sh"
|
||||
|
||||
echo "python_doc_push_script.sh: Invoked with $*"
|
||||
|
||||
set -ex -o pipefail
|
||||
set -ex
|
||||
|
||||
# for statements like ${1:-${DOCS_INSTALL_PATH:-docs/}}
|
||||
# the order of operations goes:
|
||||
@ -63,7 +63,7 @@ build_docs () {
|
||||
echo "(tried to echo the WARNINGS above the ==== line)"
|
||||
echo =========================
|
||||
fi
|
||||
set -ex -o pipefail
|
||||
set -ex
|
||||
return $code
|
||||
}
|
||||
|
||||
@ -119,6 +119,12 @@ popd
|
||||
git rm -rf "$install_path" || true
|
||||
mv "$pt_checkout/docs/build/html" "$install_path"
|
||||
|
||||
# Prevent Google from indexing $install_path/_modules. This folder contains
|
||||
# generated source files.
|
||||
# NB: the following only works on gnu sed. The sed shipped with mac os is different.
|
||||
# One can `brew install gnu-sed` on a mac and then use "gsed" instead of "sed".
|
||||
find "$install_path/_modules" -name "*.html" -print0 | xargs -0 sed -i '/<head>/a \ \ <meta name="robots" content="noindex">'
|
||||
|
||||
git add "$install_path" || true
|
||||
git status
|
||||
git config user.email "soumith+bot@pytorch.org"
|
||||
|
||||
@ -1,436 +0,0 @@
|
||||
#!/bin/bash
|
||||
# shellcheck disable=SC2086,SC2048,SC2068,SC2145,SC2034,SC2207,SC2143
|
||||
# TODO: Re-enable shellchecks above
|
||||
|
||||
set -eux -o pipefail
|
||||
|
||||
# Essentially runs pytorch/test/run_test.py, but keeps track of which tests to
|
||||
# skip in a centralized place.
|
||||
#
|
||||
# TODO Except for a few tests, this entire file is a giant TODO. Why are these
|
||||
# tests # failing?
|
||||
# TODO deal with Windows
|
||||
|
||||
# This script expects to be in the pytorch root folder
|
||||
if [[ ! -d 'test' || ! -f 'test/run_test.py' ]]; then
|
||||
echo "run_tests.sh expects to be run from the Pytorch root directory " \
|
||||
"but I'm actually in $(pwd)"
|
||||
exit 2
|
||||
fi
|
||||
|
||||
# Allow master skip of all tests
|
||||
if [[ -n "${SKIP_ALL_TESTS:-}" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# If given specific test params then just run those
|
||||
if [[ -n "${RUN_TEST_PARAMS:-}" ]]; then
|
||||
echo "$(date) :: Calling user-command $(pwd)/test/run_test.py ${RUN_TEST_PARAMS[@]}"
|
||||
python test/run_test.py ${RUN_TEST_PARAMS[@]}
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Function to retry functions that sometimes timeout or have flaky failures
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
# Parameters
|
||||
##############################################################################
|
||||
if [[ "$#" != 3 ]]; then
|
||||
if [[ -z "${DESIRED_PYTHON:-}" || -z "${DESIRED_CUDA:-}" || -z "${PACKAGE_TYPE:-}" ]]; then
|
||||
echo "USAGE: run_tests.sh PACKAGE_TYPE DESIRED_PYTHON DESIRED_CUDA"
|
||||
echo "The env variable PACKAGE_TYPE must be set to 'manywheel' or 'libtorch'"
|
||||
echo "The env variable DESIRED_PYTHON must be set like '2.7mu' or '3.6m' etc"
|
||||
echo "The env variable DESIRED_CUDA must be set like 'cpu' or 'cu80' etc"
|
||||
exit 1
|
||||
fi
|
||||
package_type="$PACKAGE_TYPE"
|
||||
py_ver="$DESIRED_PYTHON"
|
||||
cuda_ver="$DESIRED_CUDA"
|
||||
else
|
||||
package_type="$1"
|
||||
py_ver="$2"
|
||||
cuda_ver="$3"
|
||||
fi
|
||||
|
||||
if [[ "$cuda_ver" == 'cpu-cxx11-abi' ]]; then
|
||||
cuda_ver="cpu"
|
||||
fi
|
||||
|
||||
# cu80, cu90, cu100, cpu
|
||||
if [[ ${#cuda_ver} -eq 4 ]]; then
|
||||
cuda_ver_majmin="${cuda_ver:2:1}.${cuda_ver:3:1}"
|
||||
elif [[ ${#cuda_ver} -eq 5 ]]; then
|
||||
cuda_ver_majmin="${cuda_ver:2:2}.${cuda_ver:4:1}"
|
||||
fi
|
||||
|
||||
NUMPY_PACKAGE=""
|
||||
if [[ ${py_ver} == "3.10" ]]; then
|
||||
PROTOBUF_PACKAGE="protobuf>=3.17.2"
|
||||
NUMPY_PACKAGE="numpy>=1.21.2"
|
||||
else
|
||||
PROTOBUF_PACKAGE="protobuf=3.14.0"
|
||||
fi
|
||||
|
||||
# Environment initialization
|
||||
if [[ "$(uname)" == Darwin ]]; then
|
||||
# Install the testing dependencies
|
||||
retry conda install -yq future hypothesis ${NUMPY_PACKAGE} ${PROTOBUF_PACKAGE} pytest setuptools six typing_extensions pyyaml
|
||||
else
|
||||
retry pip install -qr requirements.txt || true
|
||||
retry pip install -q hypothesis protobuf pytest setuptools || true
|
||||
numpy_ver=1.15
|
||||
case "$(python --version 2>&1)" in
|
||||
*2* | *3.5* | *3.6*)
|
||||
numpy_ver=1.11
|
||||
;;
|
||||
esac
|
||||
retry pip install -q "numpy==${numpy_ver}" || true
|
||||
fi
|
||||
|
||||
echo "Testing with:"
|
||||
pip freeze
|
||||
conda list || true
|
||||
|
||||
##############################################################################
|
||||
# Smoke tests
|
||||
##############################################################################
|
||||
# TODO use check_binary.sh, which requires making sure it runs on Windows
|
||||
pushd /
|
||||
echo "Smoke testing imports"
|
||||
python -c 'import torch'
|
||||
|
||||
# Test that MKL is there
|
||||
if [[ "$(uname)" == 'Darwin' && "$package_type" == *wheel ]]; then
|
||||
echo 'Not checking for MKL on Darwin wheel packages'
|
||||
else
|
||||
echo "Checking that MKL is available"
|
||||
python -c 'import torch; exit(0 if torch.backends.mkl.is_available() else 1)'
|
||||
fi
|
||||
|
||||
if [[ "$OSTYPE" == "msys" ]]; then
|
||||
GPUS=$(wmic path win32_VideoController get name)
|
||||
if [[ ! "$GPUS" == *NVIDIA* ]]; then
|
||||
echo "Skip CUDA tests for machines without a Nvidia GPU card"
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
|
||||
# Test that the version number is consistent during building and testing
|
||||
if [[ "$PYTORCH_BUILD_NUMBER" -gt 1 ]]; then
|
||||
expected_version="${PYTORCH_BUILD_VERSION}.post${PYTORCH_BUILD_NUMBER}"
|
||||
else
|
||||
expected_version="${PYTORCH_BUILD_VERSION}"
|
||||
fi
|
||||
echo "Checking that we are testing the package that is just built"
|
||||
python -c "import torch; exit(0 if torch.__version__ == '$expected_version' else 1)"
|
||||
|
||||
# Test that CUDA builds are setup correctly
|
||||
if [[ "$cuda_ver" != 'cpu' ]]; then
|
||||
cuda_installed=1
|
||||
nvidia-smi || cuda_installed=0
|
||||
if [[ "$cuda_installed" == 0 ]]; then
|
||||
echo "Skip CUDA tests for machines without a Nvidia GPU card"
|
||||
else
|
||||
# Test CUDA archs
|
||||
echo "Checking that CUDA archs are setup correctly"
|
||||
timeout 20 python -c 'import torch; torch.randn([3,5]).cuda()'
|
||||
|
||||
# These have to run after CUDA is initialized
|
||||
echo "Checking that magma is available"
|
||||
python -c 'import torch; torch.rand(1).cuda(); exit(0 if torch.cuda.has_magma else 1)'
|
||||
echo "Checking that CuDNN is available"
|
||||
python -c 'import torch; exit(0 if torch.backends.cudnn.is_available() else 1)'
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check that OpenBlas is not linked to on MacOS
|
||||
if [[ "$(uname)" == 'Darwin' ]]; then
|
||||
echo "Checking the OpenBLAS is not linked to"
|
||||
all_dylibs=($(find "$(python -c "import site; print(site.getsitepackages()[0])")"/torch -name '*.dylib'))
|
||||
for dylib in "${all_dylibs[@]}"; do
|
||||
if [[ -n "$(otool -L $dylib | grep -i openblas)" ]]; then
|
||||
echo "Found openblas as a dependency of $dylib"
|
||||
echo "Full dependencies is: $(otool -L $dylib)"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
echo "Checking that OpenMP is available"
|
||||
python -c "import torch; exit(0 if torch.backends.openmp.is_available() else 1)"
|
||||
fi
|
||||
|
||||
popd
|
||||
|
||||
# TODO re-enable the other tests after the nightlies are moved to CI. This is
|
||||
# because the binaries keep breaking, often from additional tests, that aren't
|
||||
# real problems. Once these are on circleci and a smoke-binary-build is added
|
||||
# to PRs then this should stop happening and these can be re-enabled.
|
||||
echo "Not running unit tests. Hopefully these problems are caught by CI"
|
||||
exit 0
|
||||
|
||||
|
||||
##############################################################################
|
||||
# Running unit tests (except not right now)
|
||||
##############################################################################
|
||||
echo "$(date) :: Starting tests for $package_type package for python$py_ver and $cuda_ver"
|
||||
|
||||
# We keep track of exact tests to skip, as otherwise we would be hardly running
|
||||
# any tests. But b/c of issues working with pytest/normal-python-test/ and b/c
|
||||
# of special snowflake tests in test/run_test.py we also take special care of
|
||||
# those
|
||||
tests_to_skip=()
|
||||
|
||||
#
|
||||
# Entire file exclusions
|
||||
##############################################################################
|
||||
entire_file_exclusions=("-x")
|
||||
|
||||
# cpp_extensions doesn't work with pytest, so we exclude it from the pytest run
|
||||
# here and then manually run it later. Note that this is only because this
|
||||
# entire_fil_exclusions flag is only passed to the pytest run
|
||||
entire_file_exclusions+=("cpp_extensions")
|
||||
|
||||
# TODO temporary line to fix next days nightlies, but should be removed when
|
||||
# issue is fixed
|
||||
entire_file_exclusions+=('type_info')
|
||||
|
||||
if [[ "$cuda_ver" == 'cpu' ]]; then
|
||||
# test/test_cuda.py exits early if the installed torch is not built with
|
||||
# CUDA, but the exit doesn't work when running with pytest, so pytest will
|
||||
# still try to run all the CUDA tests and then fail
|
||||
entire_file_exclusions+=("cuda")
|
||||
entire_file_exclusions+=("nccl")
|
||||
fi
|
||||
|
||||
if [[ "$(uname)" == 'Darwin' || "$OSTYPE" == "msys" ]]; then
|
||||
# pytest on Mac doesn't like the exits in these files
|
||||
entire_file_exclusions+=('c10d')
|
||||
entire_file_exclusions+=('distributed')
|
||||
|
||||
# pytest doesn't mind the exit but fails the tests. On Mac we run this
|
||||
# later without pytest
|
||||
entire_file_exclusions+=('thd_distributed')
|
||||
fi
|
||||
|
||||
|
||||
#
|
||||
# Universal flaky tests
|
||||
##############################################################################
|
||||
|
||||
# RendezvousEnvTest sometimes hangs forever
|
||||
# Otherwise it will fail on CUDA with
|
||||
# Traceback (most recent call last):
|
||||
# File "test_c10d.py", line 179, in test_common_errors
|
||||
# next(gen)
|
||||
# AssertionError: ValueError not raised
|
||||
tests_to_skip+=('RendezvousEnvTest and test_common_errors')
|
||||
|
||||
# This hung forever once on conda_3.5_cu92
|
||||
tests_to_skip+=('TestTorch and test_sum_dim')
|
||||
|
||||
# test_trace_warn isn't actually flaky, but it doesn't work with pytest so we
|
||||
# just skip it
|
||||
tests_to_skip+=('TestJit and test_trace_warn')
|
||||
#
|
||||
# Python specific flaky tests
|
||||
##############################################################################
|
||||
|
||||
# test_dataloader.py:721: AssertionError
|
||||
# looks like a timeout, but interestingly only appears on python 3
|
||||
if [[ "$py_ver" == 3* ]]; then
|
||||
tests_to_skip+=('TestDataLoader and test_proper_exit')
|
||||
fi
|
||||
|
||||
#
|
||||
# CUDA flaky tests, all package types
|
||||
##############################################################################
|
||||
if [[ "$cuda_ver" != 'cpu' ]]; then
|
||||
|
||||
#
|
||||
# DistributedDataParallelTest
|
||||
# All of these seem to fail
|
||||
tests_to_skip+=('DistributedDataParallelTest')
|
||||
|
||||
#
|
||||
# RendezvousEnvTest
|
||||
# Traceback (most recent call last):
|
||||
# File "test_c10d.py", line 201, in test_nominal
|
||||
# store0, rank0, size0 = next(gen0)
|
||||
# File "/opt/python/cp36-cp36m/lib/python3.6/site-packages/torch/distributed/rendezvous.py", line 131, in _env_rendezvous_handler
|
||||
# store = TCPStore(master_addr, master_port, start_daemon)
|
||||
# RuntimeError: Address already in use
|
||||
tests_to_skip+=('RendezvousEnvTest and test_nominal')
|
||||
|
||||
#
|
||||
# TestCppExtension
|
||||
#
|
||||
# Traceback (most recent call last):
|
||||
# File "test_cpp_extensions.py", line 134, in test_jit_cudnn_extension
|
||||
# with_cuda=True)
|
||||
# File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 552, in load
|
||||
# with_cuda)
|
||||
# File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 729, in _jit_compile
|
||||
# return _import_module_from_library(name, build_directory)
|
||||
# File "/opt/python/cp35-cp35m/lib/python3.5/site-packages/torch/utils/cpp_extension.py", line 867, in _import_module_from_library
|
||||
# return imp.load_module(module_name, file, path, description)
|
||||
# File "/opt/python/cp35-cp35m/lib/python3.5/imp.py", line 243, in load_module
|
||||
# return load_dynamic(name, filename, file)
|
||||
# File "/opt/python/cp35-cp35m/lib/python3.5/imp.py", line 343, in load_dynamic
|
||||
# return _load(spec)
|
||||
# File "<frozen importlib._bootstrap>", line 693, in _load
|
||||
# File "<frozen importlib._bootstrap>", line 666, in _load_unlocked
|
||||
# File "<frozen importlib._bootstrap>", line 577, in module_from_spec
|
||||
# File "<frozen importlib._bootstrap_external>", line 938, in create_module
|
||||
# File "<frozen importlib._bootstrap>", line 222, in _call_with_frames_removed
|
||||
# ImportError: libcudnn.so.7: cannot open shared object file: No such file or directory
|
||||
tests_to_skip+=('TestCppExtension and test_jit_cudnn_extension')
|
||||
|
||||
#
|
||||
# TestCuda
|
||||
#
|
||||
|
||||
# 3.7_cu80
|
||||
# RuntimeError: CUDA error: out of memory
|
||||
tests_to_skip+=('TestCuda and test_arithmetic_large_tensor')
|
||||
|
||||
# 3.7_cu80
|
||||
# RuntimeError: cuda runtime error (2) : out of memory at /opt/conda/conda-bld/pytorch-nightly_1538097262541/work/aten/src/THC/THCTensorCopy.cu:205
|
||||
tests_to_skip+=('TestCuda and test_autogpu')
|
||||
|
||||
#
|
||||
# TestDistBackend
|
||||
#
|
||||
|
||||
# Traceback (most recent call last):
|
||||
# File "test_thd_distributed.py", line 1046, in wrapper
|
||||
# self._join_and_reduce(fn)
|
||||
# File "test_thd_distributed.py", line 1108, in _join_and_reduce
|
||||
# self.assertEqual(p.exitcode, first_process.exitcode)
|
||||
# File "/pytorch/test/common.py", line 399, in assertEqual
|
||||
# super(TestCase, self).assertEqual(x, y, message)
|
||||
# AssertionError: None != 77 :
|
||||
tests_to_skip+=('TestDistBackend and test_all_gather_group')
|
||||
tests_to_skip+=('TestDistBackend and test_all_reduce_group_max')
|
||||
tests_to_skip+=('TestDistBackend and test_all_reduce_group_min')
|
||||
tests_to_skip+=('TestDistBackend and test_all_reduce_group_sum')
|
||||
tests_to_skip+=('TestDistBackend and test_all_reduce_group_product')
|
||||
tests_to_skip+=('TestDistBackend and test_barrier_group')
|
||||
tests_to_skip+=('TestDistBackend and test_broadcast_group')
|
||||
|
||||
# Traceback (most recent call last):
|
||||
# File "test_thd_distributed.py", line 1046, in wrapper
|
||||
# self._join_and_reduce(fn)
|
||||
# File "test_thd_distributed.py", line 1108, in _join_and_reduce
|
||||
# self.assertEqual(p.exitcode, first_process.exitcode)
|
||||
# File "/pytorch/test/common.py", line 397, in assertEqual
|
||||
# super(TestCase, self).assertLessEqual(abs(x - y), prec, message)
|
||||
# AssertionError: 12 not less than or equal to 1e-05
|
||||
tests_to_skip+=('TestDistBackend and test_barrier')
|
||||
|
||||
# Traceback (most recent call last):
|
||||
# File "test_distributed.py", line 1267, in wrapper
|
||||
# self._join_and_reduce(fn)
|
||||
# File "test_distributed.py", line 1350, in _join_and_reduce
|
||||
# self.assertEqual(p.exitcode, first_process.exitcode)
|
||||
# File "/pytorch/test/common.py", line 399, in assertEqual
|
||||
# super(TestCase, self).assertEqual(x, y, message)
|
||||
# AssertionError: None != 1
|
||||
tests_to_skip+=('TestDistBackend and test_broadcast')
|
||||
|
||||
# Memory leak very similar to all the conda ones below, but appears on manywheel
|
||||
# 3.6m_cu80
|
||||
# AssertionError: 1605632 not less than or equal to 1e-05 : __main__.TestEndToEndHybridFrontendModels.test_vae_cuda leaked 1605632 bytes CUDA memory on device 0
|
||||
tests_to_skip+=('TestEndToEndHybridFrontendModels and test_vae_cuda')
|
||||
|
||||
# ________________________ TestNN.test_embedding_bag_cuda ________________________
|
||||
#
|
||||
# self = <test_nn.TestNN testMethod=test_embedding_bag_cuda>
|
||||
# dtype = torch.float32
|
||||
#
|
||||
# @unittest.skipIf(not TEST_CUDA, "CUDA unavailable")
|
||||
# @repeat_test_for_types(ALL_TENSORTYPES)
|
||||
# @skipIfRocm
|
||||
# def test_embedding_bag_cuda(self, dtype=torch.float):
|
||||
# self._test_EmbeddingBag(True, 'sum', False, dtype)
|
||||
# self._test_EmbeddingBag(True, 'mean', False, dtype)
|
||||
# self._test_EmbeddingBag(True, 'max', False, dtype)
|
||||
# if dtype != torch.half:
|
||||
# # torch.cuda.sparse.HalfTensor is not enabled.
|
||||
# self._test_EmbeddingBag(True, 'sum', True, dtype)
|
||||
# > self._test_EmbeddingBag(True, 'mean', True, dtype)
|
||||
#
|
||||
# test_nn.py:2144:
|
||||
# _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
|
||||
# test_nn.py:2062: in _test_EmbeddingBag
|
||||
# _test_vs_Embedding(N, D, B, L)
|
||||
# test_nn.py:2059: in _test_vs_Embedding
|
||||
# self.assertEqual(es_weight_grad, e.weight.grad, needed_prec)
|
||||
# common.py:373: in assertEqual
|
||||
# assertTensorsEqual(x, y)
|
||||
# common.py:365: in assertTensorsEqual
|
||||
# self.assertLessEqual(max_err, prec, message)
|
||||
# E AssertionError: tensor(0.0000, device='cuda:0', dtype=torch.float32) not less than or equal to 2e-05 :
|
||||
# 1 failed, 1202 passed, 19 skipped, 2 xfailed, 796 warnings in 1166.73 seconds =
|
||||
# Traceback (most recent call last):
|
||||
# File "test/run_test.py", line 391, in <module>
|
||||
# main()
|
||||
# File "test/run_test.py", line 383, in main
|
||||
# raise RuntimeError(message)
|
||||
tests_to_skip+=('TestNN and test_embedding_bag_cuda')
|
||||
fi
|
||||
|
||||
##############################################################################
|
||||
# MacOS specific flaky tests
|
||||
##############################################################################
|
||||
|
||||
if [[ "$(uname)" == 'Darwin' ]]; then
|
||||
# TestCppExtensions by default uses a temp folder in /tmp. This doesn't
|
||||
# work for this Mac machine cause there is only one machine and /tmp is
|
||||
# shared. (All the linux builds are on docker so have their own /tmp).
|
||||
tests_to_skip+=('TestCppExtension')
|
||||
fi
|
||||
|
||||
# Turn the set of tests to skip into an invocation that pytest understands
|
||||
excluded_tests_logic=''
|
||||
for exclusion in "${tests_to_skip[@]}"; do
|
||||
if [[ -z "$excluded_tests_logic" ]]; then
|
||||
# Only true for i==0
|
||||
excluded_tests_logic="not ($exclusion)"
|
||||
else
|
||||
excluded_tests_logic="$excluded_tests_logic and not ($exclusion)"
|
||||
fi
|
||||
done
|
||||
|
||||
|
||||
##############################################################################
|
||||
# Run the tests
|
||||
##############################################################################
|
||||
echo
|
||||
echo "$(date) :: Calling 'python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k '$excluded_tests_logic'"
|
||||
|
||||
python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k "'" "$excluded_tests_logic" "'"
|
||||
|
||||
echo
|
||||
echo "$(date) :: Finished 'python test/run_test.py -v -p pytest ${entire_file_exclusions[@]} -- --disable-pytest-warnings -k '$excluded_tests_logic'"
|
||||
|
||||
# cpp_extensions don't work with pytest, so we run them without pytest here,
|
||||
# except there's a failure on CUDA builds (documented above), and
|
||||
# cpp_extensions doesn't work on a shared mac machine (also documented above)
|
||||
if [[ "$cuda_ver" == 'cpu' && "$(uname)" != 'Darwin' ]]; then
|
||||
echo
|
||||
echo "$(date) :: Calling 'python test/run_test.py -v -i cpp_extensions'"
|
||||
python test/run_test.py -v -i cpp_extensions
|
||||
echo
|
||||
echo "$(date) :: Finished 'python test/run_test.py -v -i cpp_extensions'"
|
||||
fi
|
||||
|
||||
# thd_distributed can run on Mac but not in pytest
|
||||
if [[ "$(uname)" == 'Darwin' ]]; then
|
||||
echo
|
||||
echo "$(date) :: Calling 'python test/run_test.py -v -i thd_distributed'"
|
||||
python test/run_test.py -v -i thd_distributed
|
||||
echo
|
||||
echo "$(date) :: Finished 'python test/run_test.py -v -i thd_distributed'"
|
||||
fi
|
||||
@ -1,113 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
import concurrent.futures
|
||||
import distutils.sysconfig
|
||||
import functools
|
||||
import itertools
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
|
||||
# We also check that there are [not] cxx11 symbols in libtorch
|
||||
#
|
||||
# To check whether it is using cxx11 ABI, check non-existence of symbol:
|
||||
PRE_CXX11_SYMBOLS = (
|
||||
"std::basic_string<",
|
||||
"std::list",
|
||||
)
|
||||
# To check whether it is using pre-cxx11 ABI, check non-existence of symbol:
|
||||
CXX11_SYMBOLS = (
|
||||
"std::__cxx11::basic_string",
|
||||
"std::__cxx11::list",
|
||||
)
|
||||
# NOTE: Checking the above symbols in all namespaces doesn't work, because
|
||||
# devtoolset7 always produces some cxx11 symbols even if we build with old ABI,
|
||||
# and CuDNN always has pre-cxx11 symbols even if we build with new ABI using gcc 5.4.
|
||||
# Instead, we *only* check the above symbols in the following namespaces:
|
||||
LIBTORCH_NAMESPACE_LIST = (
|
||||
"c10::",
|
||||
"at::",
|
||||
"caffe2::",
|
||||
"torch::",
|
||||
)
|
||||
|
||||
|
||||
def _apply_libtorch_symbols(symbols):
|
||||
return [
|
||||
re.compile(f"{x}.*{y}")
|
||||
for (x, y) in itertools.product(LIBTORCH_NAMESPACE_LIST, symbols)
|
||||
]
|
||||
|
||||
|
||||
LIBTORCH_CXX11_PATTERNS = _apply_libtorch_symbols(CXX11_SYMBOLS)
|
||||
|
||||
LIBTORCH_PRE_CXX11_PATTERNS = _apply_libtorch_symbols(PRE_CXX11_SYMBOLS)
|
||||
|
||||
|
||||
@functools.lru_cache(100)
|
||||
def get_symbols(lib: str) -> list[tuple[str, str, str]]:
|
||||
from subprocess import check_output
|
||||
|
||||
lines = check_output(f'nm "{lib}"|c++filt', shell=True)
|
||||
return [x.split(" ", 2) for x in lines.decode("latin1").split("\n")[:-1]]
|
||||
|
||||
|
||||
def grep_symbols(lib: str, patterns: list[Any]) -> list[str]:
|
||||
def _grep_symbols(
|
||||
symbols: list[tuple[str, str, str]], patterns: list[Any]
|
||||
) -> list[str]:
|
||||
rc = []
|
||||
for _s_addr, _s_type, s_name in symbols:
|
||||
for pattern in patterns:
|
||||
if pattern.match(s_name):
|
||||
rc.append(s_name)
|
||||
continue
|
||||
return rc
|
||||
|
||||
all_symbols = get_symbols(lib)
|
||||
num_workers = 32
|
||||
chunk_size = (len(all_symbols) + num_workers - 1) // num_workers
|
||||
|
||||
def _get_symbols_chunk(i):
|
||||
return all_symbols[i * chunk_size : (i + 1) * chunk_size]
|
||||
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=32) as executor:
|
||||
tasks = [
|
||||
executor.submit(_grep_symbols, _get_symbols_chunk(i), patterns)
|
||||
for i in range(num_workers)
|
||||
]
|
||||
return functools.reduce(list.__add__, (x.result() for x in tasks), [])
|
||||
|
||||
|
||||
def check_lib_symbols_for_abi_correctness(lib: str) -> None:
|
||||
print(f"lib: {lib}")
|
||||
cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS)
|
||||
pre_cxx11_symbols = grep_symbols(lib, LIBTORCH_PRE_CXX11_PATTERNS)
|
||||
num_cxx11_symbols = len(cxx11_symbols)
|
||||
num_pre_cxx11_symbols = len(pre_cxx11_symbols)
|
||||
print(f"num_cxx11_symbols: {num_cxx11_symbols}")
|
||||
print(f"num_pre_cxx11_symbols: {num_pre_cxx11_symbols}")
|
||||
if num_pre_cxx11_symbols > 0:
|
||||
raise RuntimeError(
|
||||
f"Found pre-cxx11 symbols, but there shouldn't be any, see: {pre_cxx11_symbols[:100]}"
|
||||
)
|
||||
if num_cxx11_symbols < 100:
|
||||
raise RuntimeError("Didn't find enought cxx11 symbols")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
if "install_root" in os.environ:
|
||||
install_root = Path(os.getenv("install_root")) # noqa: SIM112
|
||||
else:
|
||||
if os.getenv("PACKAGE_TYPE") == "libtorch":
|
||||
install_root = Path(os.getcwd())
|
||||
else:
|
||||
install_root = Path(distutils.sysconfig.get_python_lib()) / "torch"
|
||||
|
||||
libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so")
|
||||
check_lib_symbols_for_abi_correctness(libtorch_cpu_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,209 +0,0 @@
|
||||
import argparse
|
||||
|
||||
from torchvision import datasets, transforms
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
from torch.optim.lr_scheduler import StepLR
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super(Net, self).__init__() # noqa: UP008
|
||||
self.conv1 = nn.Conv2d(1, 32, 3, 1)
|
||||
self.conv2 = nn.Conv2d(32, 64, 3, 1)
|
||||
self.dropout1 = nn.Dropout(0.25)
|
||||
self.dropout2 = nn.Dropout(0.5)
|
||||
self.fc1 = nn.Linear(9216, 128)
|
||||
self.fc2 = nn.Linear(128, 10)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = F.relu(x)
|
||||
x = self.conv2(x)
|
||||
x = F.relu(x)
|
||||
x = F.max_pool2d(x, 2)
|
||||
x = self.dropout1(x)
|
||||
x = torch.flatten(x, 1)
|
||||
x = self.fc1(x)
|
||||
x = F.relu(x)
|
||||
x = self.dropout2(x)
|
||||
x = self.fc2(x)
|
||||
output = F.log_softmax(x, dim=1)
|
||||
return output
|
||||
|
||||
|
||||
def train(args, model, device, train_loader, optimizer, epoch):
|
||||
model.train()
|
||||
for batch_idx, (data, target) in enumerate(train_loader):
|
||||
data, target = data.to(device), target.to(device)
|
||||
optimizer.zero_grad()
|
||||
output = model(data)
|
||||
loss = F.nll_loss(output, target)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
if batch_idx % args.log_interval == 0:
|
||||
print(
|
||||
f"Train Epoch: {epoch} "
|
||||
f"[{batch_idx * len(data)}/{len(train_loader.dataset)} "
|
||||
f"({100.0 * batch_idx / len(train_loader):.0f}%)]\tLoss: {loss.item():.6f}"
|
||||
)
|
||||
if args.dry_run:
|
||||
break
|
||||
|
||||
|
||||
def test(model, device, test_loader):
|
||||
model.eval()
|
||||
test_loss = 0
|
||||
correct = 0
|
||||
with torch.no_grad():
|
||||
for data, target in test_loader:
|
||||
data, target = data.to(device), target.to(device)
|
||||
output = model(data)
|
||||
test_loss += F.nll_loss(
|
||||
output, target, reduction="sum"
|
||||
).item() # sum up batch loss
|
||||
pred = output.argmax(
|
||||
dim=1, keepdim=True
|
||||
) # get the index of the max log-probability
|
||||
correct += pred.eq(target.view_as(pred)).sum().item()
|
||||
|
||||
test_loss /= len(test_loader.dataset)
|
||||
|
||||
print(
|
||||
f"\nTest set: Average loss: {test_loss:.4f}, "
|
||||
f"Accuracy: {correct}/{len(test_loader.dataset)} "
|
||||
f"({100.0 * correct / len(test_loader.dataset):.0f}%)\n"
|
||||
)
|
||||
|
||||
|
||||
def timed(fn):
|
||||
start = torch.cuda.Event(enable_timing=True)
|
||||
end = torch.cuda.Event(enable_timing=True)
|
||||
start.record()
|
||||
result = fn()
|
||||
end.record()
|
||||
torch.cuda.synchronize()
|
||||
return result, start.elapsed_time(end) / 1000
|
||||
|
||||
|
||||
def main():
|
||||
# Training settings
|
||||
parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
|
||||
parser.add_argument(
|
||||
"--batch-size",
|
||||
type=int,
|
||||
default=64,
|
||||
metavar="N",
|
||||
help="input batch size for training (default: 64)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--test-batch-size",
|
||||
type=int,
|
||||
default=1000,
|
||||
metavar="N",
|
||||
help="input batch size for testing (default: 1000)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--epochs",
|
||||
type=int,
|
||||
default=4,
|
||||
metavar="N",
|
||||
help="number of epochs to train (default: 14)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--lr",
|
||||
type=float,
|
||||
default=1.0,
|
||||
metavar="LR",
|
||||
help="learning rate (default: 1.0)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--gamma",
|
||||
type=float,
|
||||
default=0.7,
|
||||
metavar="M",
|
||||
help="Learning rate step gamma (default: 0.7)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-cuda", action="store_true", default=False, help="disables CUDA training"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-mps",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="disables macOS GPU training",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="quickly check a single pass",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
|
||||
)
|
||||
parser.add_argument(
|
||||
"--log-interval",
|
||||
type=int,
|
||||
default=100,
|
||||
metavar="N",
|
||||
help="how many batches to wait before logging training status",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--save-model",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="For Saving the current Model",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
use_cuda = not args.no_cuda and torch.cuda.is_available()
|
||||
use_mps = not args.no_mps and torch.backends.mps.is_available()
|
||||
|
||||
torch.manual_seed(args.seed)
|
||||
torch.backends.cuda.matmul.allow_tf32 = True
|
||||
|
||||
if use_cuda:
|
||||
device = torch.device("cuda")
|
||||
elif use_mps:
|
||||
device = torch.device("mps")
|
||||
else:
|
||||
device = torch.device("cpu")
|
||||
|
||||
train_kwargs = {"batch_size": args.batch_size}
|
||||
test_kwargs = {"batch_size": args.test_batch_size}
|
||||
if use_cuda:
|
||||
cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True}
|
||||
train_kwargs.update(cuda_kwargs)
|
||||
test_kwargs.update(cuda_kwargs)
|
||||
|
||||
transform = transforms.Compose(
|
||||
[transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
|
||||
)
|
||||
dataset1 = datasets.MNIST("../data", train=True, download=True, transform=transform)
|
||||
dataset2 = datasets.MNIST("../data", train=False, transform=transform)
|
||||
train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
|
||||
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)
|
||||
|
||||
model = Net().to(device)
|
||||
opt_model = torch.compile(model, mode="max-autotune")
|
||||
optimizer = optim.Adadelta(opt_model.parameters(), lr=args.lr)
|
||||
|
||||
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
|
||||
for epoch in range(1, args.epochs + 1):
|
||||
print(
|
||||
f"Training Time: {timed(lambda: train(args, opt_model, device, train_loader, optimizer, epoch))[1]}"
|
||||
)
|
||||
print(
|
||||
f"Evaluation Time: {timed(lambda: test(opt_model, device, test_loader))[1]}"
|
||||
)
|
||||
scheduler.step()
|
||||
|
||||
if args.save_model:
|
||||
torch.save(opt_model.state_dict(), "mnist_cnn.pt")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -1,484 +0,0 @@
|
||||
import argparse
|
||||
import importlib
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from tempfile import NamedTemporaryFile
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch._dynamo
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
|
||||
|
||||
if "MATRIX_GPU_ARCH_VERSION" in os.environ:
|
||||
gpu_arch_ver = os.getenv("MATRIX_GPU_ARCH_VERSION")
|
||||
else:
|
||||
gpu_arch_ver = os.getenv("GPU_ARCH_VERSION") # Use fallback if available
|
||||
gpu_arch_type = os.getenv("MATRIX_GPU_ARCH_TYPE")
|
||||
channel = os.getenv("MATRIX_CHANNEL")
|
||||
package_type = os.getenv("MATRIX_PACKAGE_TYPE")
|
||||
target_os = os.getenv("TARGET_OS", sys.platform)
|
||||
BASE_DIR = Path(__file__).parent.parent.parent
|
||||
|
||||
is_cuda_system = gpu_arch_type == "cuda"
|
||||
NIGHTLY_ALLOWED_DELTA = 3
|
||||
|
||||
MODULES = [
|
||||
{
|
||||
"name": "torchvision",
|
||||
"repo": "https://github.com/pytorch/vision.git",
|
||||
"smoke_test": "./vision/test/smoke_test.py",
|
||||
"extension": "extension",
|
||||
"repo_name": "vision",
|
||||
},
|
||||
{
|
||||
"name": "torchaudio",
|
||||
"repo": "https://github.com/pytorch/audio.git",
|
||||
"smoke_test": "./audio/test/smoke_test/smoke_test.py --no-ffmpeg",
|
||||
"extension": "_extension",
|
||||
"repo_name": "audio",
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
class Net(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.conv1 = nn.Conv2d(1, 32, 3, 1)
|
||||
self.conv2 = nn.Conv2d(32, 64, 3, 1)
|
||||
self.fc1 = nn.Linear(9216, 1)
|
||||
|
||||
def forward(self, x):
|
||||
x = self.conv1(x)
|
||||
x = self.conv2(x)
|
||||
x = F.max_pool2d(x, 2)
|
||||
x = torch.flatten(x, 1)
|
||||
output = self.fc1(x)
|
||||
return output
|
||||
|
||||
|
||||
def load_json_from_basedir(filename: str):
|
||||
try:
|
||||
with open(BASE_DIR / filename) as fptr:
|
||||
return json.load(fptr)
|
||||
except FileNotFoundError as exc:
|
||||
raise ImportError(f"File {filename} not found error: {exc.strerror}") from exc
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ImportError(f"Invalid JSON {filename}") from exc
|
||||
|
||||
|
||||
def read_release_matrix():
|
||||
return load_json_from_basedir("release_matrix.json")
|
||||
|
||||
|
||||
def test_numpy():
|
||||
try:
|
||||
import numpy as np
|
||||
|
||||
x = np.arange(5)
|
||||
torch.tensor(x)
|
||||
except ImportError:
|
||||
print("Numpy check skipped. Numpy is not installed.")
|
||||
|
||||
|
||||
def check_version(package: str) -> None:
|
||||
release_version = os.getenv("RELEASE_VERSION")
|
||||
# if release_version is specified, use it to validate the packages
|
||||
if release_version:
|
||||
release_matrix = read_release_matrix()
|
||||
stable_version = release_matrix["torch"]
|
||||
else:
|
||||
stable_version = os.getenv("MATRIX_STABLE_VERSION")
|
||||
|
||||
# only makes sense to check nightly package where dates are known
|
||||
if channel == "nightly":
|
||||
check_nightly_binaries_date(package)
|
||||
elif stable_version is not None:
|
||||
if not torch.__version__.startswith(stable_version):
|
||||
raise RuntimeError(
|
||||
f"Torch version mismatch, expected {stable_version} for channel {channel}. But its {torch.__version__}"
|
||||
)
|
||||
|
||||
if release_version and package == "all":
|
||||
for module in MODULES:
|
||||
imported_module = importlib.import_module(module["name"])
|
||||
module_version = imported_module.__version__
|
||||
if not module_version.startswith(release_matrix[module["name"]]):
|
||||
raise RuntimeError(
|
||||
f"{module['name']} version mismatch, expected: \
|
||||
{release_matrix[module['name']]} for channel {channel}. But its {module_version}"
|
||||
)
|
||||
else:
|
||||
print(
|
||||
f"{module['name']} version actual: {module_version} expected: \
|
||||
{release_matrix[module['name']]} for channel {channel}."
|
||||
)
|
||||
|
||||
else:
|
||||
print(f"Skip version check for channel {channel} as stable version is None")
|
||||
|
||||
|
||||
def check_nightly_binaries_date(package: str) -> None:
|
||||
from datetime import datetime
|
||||
|
||||
format_dt = "%Y%m%d"
|
||||
|
||||
date_t_str = re.findall("dev\\d+", torch.__version__)
|
||||
date_t_delta = datetime.now() - datetime.strptime(date_t_str[0][3:], format_dt)
|
||||
if date_t_delta.days >= NIGHTLY_ALLOWED_DELTA:
|
||||
raise RuntimeError(
|
||||
f"the binaries are from {date_t_str} and are more than {NIGHTLY_ALLOWED_DELTA} days old!"
|
||||
)
|
||||
|
||||
if package == "all":
|
||||
for module in MODULES:
|
||||
imported_module = importlib.import_module(module["name"])
|
||||
module_version = imported_module.__version__
|
||||
date_m_str = re.findall("dev\\d+", module_version)
|
||||
date_m_delta = datetime.now() - datetime.strptime(
|
||||
date_m_str[0][3:], format_dt
|
||||
)
|
||||
print(f"Nightly date check for {module['name']} version {module_version}")
|
||||
if date_m_delta.days > NIGHTLY_ALLOWED_DELTA:
|
||||
raise RuntimeError(
|
||||
f"Expected {module['name']} to be less then {NIGHTLY_ALLOWED_DELTA} days. But its {date_m_delta}"
|
||||
)
|
||||
|
||||
|
||||
def test_cuda_runtime_errors_captured() -> None:
|
||||
cuda_exception_missed = True
|
||||
try:
|
||||
print("Testing test_cuda_runtime_errors_captured")
|
||||
torch._assert_async(torch.tensor(0, device="cuda"))
|
||||
torch._assert_async(torch.tensor(0 + 0j, device="cuda"))
|
||||
except RuntimeError as e:
|
||||
if re.search("CUDA", f"{e}"):
|
||||
print(f"Caught CUDA exception with success: {e}")
|
||||
cuda_exception_missed = False
|
||||
else:
|
||||
raise e
|
||||
if cuda_exception_missed:
|
||||
raise RuntimeError("Expected CUDA RuntimeError but have not received!")
|
||||
|
||||
|
||||
def test_cuda_gds_errors_captured() -> None:
|
||||
major_version = int(torch.version.cuda.split(".")[0])
|
||||
minor_version = int(torch.version.cuda.split(".")[1])
|
||||
|
||||
if target_os == "windows":
|
||||
print(f"{target_os} is not supported for GDS smoke test")
|
||||
return
|
||||
|
||||
if major_version < 12 or (major_version == 12 and minor_version < 6):
|
||||
print("CUDA version is not supported for GDS smoke test")
|
||||
return
|
||||
|
||||
cuda_exception_missed = True
|
||||
try:
|
||||
print("Testing test_cuda_gds_errors_captured")
|
||||
with NamedTemporaryFile() as f:
|
||||
torch.cuda.gds.GdsFile(f.name, os.O_CREAT | os.O_RDWR)
|
||||
except RuntimeError as e:
|
||||
expected_error = "cuFileHandleRegister failed"
|
||||
if re.search(expected_error, f"{e}"):
|
||||
print(f"Caught CUDA exception with success: {e}")
|
||||
cuda_exception_missed = False
|
||||
else:
|
||||
raise e
|
||||
if cuda_exception_missed:
|
||||
raise RuntimeError(
|
||||
"Expected cuFileHandleRegister failed RuntimeError but have not received!"
|
||||
)
|
||||
|
||||
|
||||
def find_pypi_package_version(package: str) -> Optional[str]:
|
||||
from importlib import metadata
|
||||
|
||||
dists = metadata.distributions()
|
||||
for dist in dists:
|
||||
if dist.metadata["Name"].startswith(package):
|
||||
return dist.version
|
||||
return None
|
||||
|
||||
|
||||
def cudnn_to_version_str(cudnn_version: int) -> str:
|
||||
patch = int(cudnn_version % 10)
|
||||
minor = int((cudnn_version / 100) % 100)
|
||||
major = int((cudnn_version / 10000) % 10000)
|
||||
return f"{major}.{minor}.{patch}"
|
||||
|
||||
|
||||
def compare_pypi_to_torch_versions(
|
||||
package: str, pypi_version: str, torch_version: str
|
||||
) -> None:
|
||||
if pypi_version is None:
|
||||
raise RuntimeError(f"Can't find {package} in PyPI for Torch: {torch_version}")
|
||||
if pypi_version.startswith(torch_version):
|
||||
print(f"Found matching {package}. Torch: {torch_version} PyPI {pypi_version}")
|
||||
else:
|
||||
raise RuntimeError(
|
||||
f"Wrong {package} version. Torch: {torch_version} PyPI: {pypi_version}"
|
||||
)
|
||||
|
||||
|
||||
def smoke_test_cuda(
|
||||
package: str,
|
||||
runtime_error_check: str,
|
||||
torch_compile_check: str,
|
||||
pypi_pkg_check: str,
|
||||
) -> None:
|
||||
if not torch.cuda.is_available() and is_cuda_system:
|
||||
raise RuntimeError(f"Expected CUDA {gpu_arch_ver}. However CUDA is not loaded.")
|
||||
|
||||
if package == "all" and is_cuda_system:
|
||||
for module in MODULES:
|
||||
imported_module = importlib.import_module(module["name"])
|
||||
# TBD for vision move extension module to private so it will
|
||||
# be _extention.
|
||||
version = "N/A"
|
||||
if module["extension"] == "extension":
|
||||
version = imported_module.extension._check_cuda_version()
|
||||
else:
|
||||
version = imported_module._extension._check_cuda_version()
|
||||
print(f"{module['name']} CUDA: {version}")
|
||||
|
||||
# torch.compile is available on macos-arm64 and Linux for python 3.8-3.13
|
||||
if (
|
||||
torch_compile_check == "enabled"
|
||||
and sys.version_info < (3, 14, 0)
|
||||
and target_os in ["linux", "linux-aarch64", "macos-arm64", "darwin"]
|
||||
):
|
||||
smoke_test_compile("cuda" if torch.cuda.is_available() else "cpu")
|
||||
|
||||
if torch.cuda.is_available():
|
||||
if torch.version.cuda != gpu_arch_ver:
|
||||
raise RuntimeError(
|
||||
f"Wrong CUDA version. Loaded: {torch.version.cuda} Expected: {gpu_arch_ver}"
|
||||
)
|
||||
|
||||
print(f"torch cuda: {torch.version.cuda}")
|
||||
torch.cuda.init()
|
||||
print("CUDA initialized successfully")
|
||||
print(f"Number of CUDA devices: {torch.cuda.device_count()}")
|
||||
for i in range(torch.cuda.device_count()):
|
||||
print(f"Device {i}: {torch.cuda.get_device_name(i)}")
|
||||
|
||||
print(f"cuDNN enabled? {torch.backends.cudnn.enabled}")
|
||||
torch_cudnn_version = cudnn_to_version_str(torch.backends.cudnn.version())
|
||||
print(f"Torch cuDNN version: {torch_cudnn_version}")
|
||||
|
||||
if sys.platform in ["linux", "linux2"]:
|
||||
torch_nccl_version = ".".join(str(v) for v in torch.cuda.nccl.version())
|
||||
print(f"Torch nccl; version: {torch_nccl_version}")
|
||||
|
||||
# Pypi dependencies are installed on linux ony and nccl is availbale only on Linux.
|
||||
if pypi_pkg_check == "enabled" and sys.platform in ["linux", "linux2"]:
|
||||
compare_pypi_to_torch_versions(
|
||||
"cudnn", find_pypi_package_version("nvidia-cudnn"), torch_cudnn_version
|
||||
)
|
||||
compare_pypi_to_torch_versions(
|
||||
"nccl", find_pypi_package_version("nvidia-nccl"), torch_nccl_version
|
||||
)
|
||||
|
||||
if runtime_error_check == "enabled":
|
||||
test_cuda_runtime_errors_captured()
|
||||
|
||||
|
||||
def smoke_test_conv2d() -> None:
|
||||
import torch.nn as nn
|
||||
|
||||
print("Testing smoke_test_conv2d")
|
||||
# With square kernels and equal stride
|
||||
m = nn.Conv2d(16, 33, 3, stride=2)
|
||||
# non-square kernels and unequal stride and with padding
|
||||
m = nn.Conv2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2))
|
||||
assert m is not None
|
||||
# non-square kernels and unequal stride and with padding and dilation
|
||||
basic_conv = nn.Conv2d(
|
||||
16, 33, (3, 5), stride=(2, 1), padding=(4, 2), dilation=(3, 1)
|
||||
)
|
||||
input = torch.randn(20, 16, 50, 100)
|
||||
output = basic_conv(input)
|
||||
|
||||
if is_cuda_system:
|
||||
print("Testing smoke_test_conv2d with cuda")
|
||||
conv = nn.Conv2d(3, 3, 3).cuda()
|
||||
x = torch.randn(1, 3, 24, 24, device="cuda")
|
||||
with torch.cuda.amp.autocast():
|
||||
out = conv(x)
|
||||
assert out is not None
|
||||
|
||||
supported_dtypes = [torch.float16, torch.float32, torch.float64]
|
||||
for dtype in supported_dtypes:
|
||||
print(f"Testing smoke_test_conv2d with cuda for {dtype}")
|
||||
conv = basic_conv.to(dtype).cuda()
|
||||
input = torch.randn(20, 16, 50, 100, device="cuda").type(dtype)
|
||||
output = conv(input)
|
||||
assert output is not None
|
||||
|
||||
|
||||
def test_linalg(device="cpu") -> None:
|
||||
print(f"Testing smoke_test_linalg on {device}")
|
||||
A = torch.randn(5, 3, device=device)
|
||||
U, S, Vh = torch.linalg.svd(A, full_matrices=False)
|
||||
assert (
|
||||
U.shape == A.shape
|
||||
and S.shape == torch.Size([3])
|
||||
and Vh.shape == torch.Size([3, 3])
|
||||
)
|
||||
torch.dist(A, U @ torch.diag(S) @ Vh)
|
||||
|
||||
U, S, Vh = torch.linalg.svd(A)
|
||||
assert (
|
||||
U.shape == torch.Size([5, 5])
|
||||
and S.shape == torch.Size([3])
|
||||
and Vh.shape == torch.Size([3, 3])
|
||||
)
|
||||
torch.dist(A, U[:, :3] @ torch.diag(S) @ Vh)
|
||||
|
||||
A = torch.randn(7, 5, 3, device=device)
|
||||
U, S, Vh = torch.linalg.svd(A, full_matrices=False)
|
||||
torch.dist(A, U @ torch.diag_embed(S) @ Vh)
|
||||
|
||||
if device == "cuda":
|
||||
supported_dtypes = [torch.float32, torch.float64]
|
||||
for dtype in supported_dtypes:
|
||||
print(f"Testing smoke_test_linalg with cuda for {dtype}")
|
||||
A = torch.randn(20, 16, 50, 100, device=device, dtype=dtype)
|
||||
torch.linalg.svd(A)
|
||||
|
||||
|
||||
def smoke_test_compile(device: str = "cpu") -> None:
|
||||
supported_dtypes = [torch.float16, torch.float32, torch.float64]
|
||||
|
||||
def foo(x: torch.Tensor) -> torch.Tensor:
|
||||
return torch.sin(x) + torch.cos(x)
|
||||
|
||||
for dtype in supported_dtypes:
|
||||
print(f"Testing smoke_test_compile for {device} and {dtype}")
|
||||
x = torch.rand(3, 3, device=device).type(dtype)
|
||||
x_eager = foo(x)
|
||||
x_pt2 = torch.compile(foo)(x)
|
||||
torch.testing.assert_close(x_eager, x_pt2)
|
||||
|
||||
# Check that SIMD were detected for the architecture
|
||||
if device == "cpu":
|
||||
from torch._inductor.codecache import pick_vec_isa
|
||||
|
||||
isa = pick_vec_isa()
|
||||
if not isa:
|
||||
raise RuntimeError("Can't detect vectorized ISA for CPU")
|
||||
print(f"Picked CPU ISA {type(isa).__name__} bit width {isa.bit_width()}")
|
||||
|
||||
# Reset torch dynamo since we are changing mode
|
||||
torch._dynamo.reset()
|
||||
dtype = torch.float32
|
||||
torch.set_float32_matmul_precision("high")
|
||||
print(f"Testing smoke_test_compile with mode 'max-autotune' for {dtype}")
|
||||
x = torch.rand(64, 1, 28, 28, device=device).type(torch.float32)
|
||||
model = Net().to(device=device)
|
||||
x_pt2 = torch.compile(model, mode="max-autotune")(x)
|
||||
|
||||
|
||||
def smoke_test_modules():
|
||||
cwd = os.getcwd()
|
||||
for module in MODULES:
|
||||
if module["repo"]:
|
||||
if not os.path.exists(f"{cwd}/{module['repo_name']}"):
|
||||
print(f"Path does not exist: {cwd}/{module['repo_name']}")
|
||||
try:
|
||||
subprocess.check_output(
|
||||
f"git clone --depth 1 {module['repo']}",
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
raise RuntimeError(
|
||||
f"Cloning {module['repo']} FAIL: {exc.returncode} Output: {exc.output}"
|
||||
) from exc
|
||||
try:
|
||||
smoke_test_command = f"python3 {module['smoke_test']}"
|
||||
if target_os == "windows":
|
||||
smoke_test_command = f"python {module['smoke_test']}"
|
||||
output = subprocess.check_output(
|
||||
smoke_test_command,
|
||||
stderr=subprocess.STDOUT,
|
||||
shell=True,
|
||||
universal_newlines=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as exc:
|
||||
raise RuntimeError(
|
||||
f"Module {module['name']} FAIL: {exc.returncode} Output: {exc.output}"
|
||||
) from exc
|
||||
else:
|
||||
print(f"Output: \n{output}\n")
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--package",
|
||||
help="Package to include in smoke testing",
|
||||
type=str,
|
||||
choices=["all", "torchonly"],
|
||||
default="all",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--runtime-error-check",
|
||||
help="No Runtime Error check",
|
||||
type=str,
|
||||
choices=["enabled", "disabled"],
|
||||
default="enabled",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--torch-compile-check",
|
||||
help="Check torch compile",
|
||||
type=str,
|
||||
choices=["enabled", "disabled"],
|
||||
default="enabled",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--pypi-pkg-check",
|
||||
help="Check pypi package versions cudnn and nccl",
|
||||
type=str,
|
||||
choices=["enabled", "disabled"],
|
||||
default="enabled",
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
options = parse_args()
|
||||
print(f"torch: {torch.__version__}")
|
||||
print(torch.__config__.parallel_info())
|
||||
# All PyTorch binary builds should be built with OpenMP
|
||||
if not torch.backends.openmp.is_available():
|
||||
raise RuntimeError("PyTorch must be built with OpenMP support")
|
||||
|
||||
check_version(options.package)
|
||||
smoke_test_conv2d()
|
||||
test_linalg()
|
||||
test_numpy()
|
||||
|
||||
if is_cuda_system:
|
||||
test_linalg("cuda")
|
||||
test_cuda_gds_errors_captured()
|
||||
|
||||
if options.package == "all":
|
||||
smoke_test_modules()
|
||||
|
||||
smoke_test_cuda(
|
||||
options.package,
|
||||
options.runtime_error_check,
|
||||
options.torch_compile_check,
|
||||
options.pypi_pkg_check,
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@ -4,7 +4,7 @@
|
||||
# (This is set by default in the Docker images we build, so you don't
|
||||
# need to set it yourself.
|
||||
|
||||
set -ex -o pipefail
|
||||
set -ex
|
||||
|
||||
# Suppress ANSI color escape sequences
|
||||
export TERM=vt100
|
||||
@ -12,9 +12,9 @@ export TERM=vt100
|
||||
# shellcheck source=./common.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
# Do not change workspace permissions for ROCm and s390x CI jobs
|
||||
# Do not change workspace permissions for ROCm CI jobs
|
||||
# as it can leave workspace with bad permissions for cancelled jobs
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
|
||||
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
|
||||
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
|
||||
cleanup_workspace() {
|
||||
@ -46,12 +46,9 @@ BUILD_BIN_DIR="$BUILD_DIR"/bin
|
||||
SHARD_NUMBER="${SHARD_NUMBER:=1}"
|
||||
NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}"
|
||||
|
||||
# enable debug asserts in serialization
|
||||
export TORCH_SERIALIZATION_DEBUG=1
|
||||
|
||||
export VALGRIND=ON
|
||||
# export TORCH_INDUCTOR_INSTALL_GXX=ON
|
||||
if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *clang9* ]]; then
|
||||
# clang9 appears to miscompile code involving std::optional<c10::SymInt>,
|
||||
# such that valgrind complains along these lines:
|
||||
#
|
||||
@ -89,13 +86,6 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
export VALGRIND=OFF
|
||||
fi
|
||||
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *s390x* ]]; then
|
||||
# There are additional warnings on s390x, maybe due to newer gcc.
|
||||
# Skip this check for now
|
||||
export VALGRIND=OFF
|
||||
fi
|
||||
|
||||
if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]] || [[ "${CONTINUE_THROUGH_ERROR}" == "1" ]]; then
|
||||
# When rerunning disable tests, do not generate core dumps as it could consume
|
||||
# the runner disk space when crashed tests are run multiple times. Running out
|
||||
@ -139,7 +129,7 @@ if [[ "$TEST_CONFIG" == 'default' ]]; then
|
||||
fi
|
||||
|
||||
if [[ "$TEST_CONFIG" == 'distributed' ]] && [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||
export HIP_VISIBLE_DEVICES=0,1,2,3
|
||||
export HIP_VISIBLE_DEVICES=0,1
|
||||
fi
|
||||
|
||||
if [[ "$TEST_CONFIG" == 'slow' ]]; then
|
||||
@ -163,8 +153,6 @@ elif [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
export PYTORCH_TESTING_DEVICE_ONLY_FOR="xpu"
|
||||
# setting PYTHON_TEST_EXTRA_OPTION
|
||||
export PYTHON_TEST_EXTRA_OPTION="--xpu"
|
||||
# Disable sccache for xpu test due to flaky issue https://github.com/pytorch/pytorch/issues/143585
|
||||
sudo rm -rf /opt/cache
|
||||
fi
|
||||
|
||||
if [[ "$TEST_CONFIG" == *crossref* ]]; then
|
||||
@ -177,9 +165,6 @@ if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
|
||||
# Print GPU info
|
||||
rocminfo
|
||||
rocminfo | grep -E 'Name:.*\sgfx|Marketing'
|
||||
|
||||
# for benchmarks/dynamo/check_accuracy.py, we need to put results in a rocm specific directory to avoid clashes with cuda
|
||||
MAYBE_ROCM="rocm/"
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
@ -314,13 +299,6 @@ test_python() {
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
test_lazy_tensor_meta_reference_disabled() {
|
||||
export TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE=1
|
||||
echo "Testing lazy tensor operations without meta reference"
|
||||
time python test/run_test.py --include lazy/test_ts_opinfo.py --verbose
|
||||
export -n TORCH_DISABLE_FUNCTIONALIZATION_META_REFERENCE
|
||||
}
|
||||
|
||||
|
||||
test_dynamo_wrapped_shard() {
|
||||
if [[ -z "$NUM_TEST_SHARDS" ]]; then
|
||||
@ -335,7 +313,6 @@ test_dynamo_wrapped_shard() {
|
||||
--exclude-jit-executor \
|
||||
--exclude-distributed-tests \
|
||||
--exclude-torch-export-tests \
|
||||
--exclude-aot-dispatch-tests \
|
||||
--shard "$1" "$NUM_TEST_SHARDS" \
|
||||
--verbose \
|
||||
--upload-artifacts-while-running
|
||||
@ -349,7 +326,7 @@ test_inductor_distributed() {
|
||||
python test/run_test.py -i inductor/test_aot_inductor.py -k test_non_default_cuda_device --verbose
|
||||
python test/run_test.py -i inductor/test_aot_inductor.py -k test_replicate_on_devices --verbose
|
||||
python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
|
||||
python test/run_test.py -i distributed/tensor/test_dtensor_compile.py --verbose
|
||||
python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
|
||||
python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/test_replicate_with_compiler.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
|
||||
@ -402,32 +379,15 @@ test_inductor_aoti() {
|
||||
CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
|
||||
}
|
||||
|
||||
test_inductor_cpp_wrapper_shard() {
|
||||
if [[ -z "$NUM_TEST_SHARDS" ]]; then
|
||||
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
test_inductor_cpp_wrapper() {
|
||||
export TORCHINDUCTOR_CPP_WRAPPER=1
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
if [[ "$1" -eq "2" ]]; then
|
||||
# For now, manually put the opinfo tests in shard 2, and all other tests in
|
||||
# shard 1. Test specific things triggering past bugs, for now.
|
||||
python test/run_test.py \
|
||||
--include inductor/test_torchinductor_opinfo \
|
||||
-k 'linalg or to_sparse' \
|
||||
--verbose
|
||||
exit
|
||||
fi
|
||||
# Run certain inductor unit tests with cpp wrapper. In the end state, we should be able to run all the inductor
|
||||
# unit tests with cpp wrapper.
|
||||
python test/run_test.py --include inductor/test_torchinductor.py --verbose
|
||||
|
||||
# Run certain inductor unit tests with cpp wrapper. In the end state, we
|
||||
# should be able to run all the inductor unit tests with cpp_wrapper.
|
||||
python test/run_test.py \
|
||||
--include inductor/test_torchinductor inductor/test_max_autotune inductor/test_cpu_repro \
|
||||
--verbose
|
||||
python test/run_test.py --inductor --include test_torch -k 'take' --verbose
|
||||
|
||||
# Run inductor benchmark tests with cpp wrapper.
|
||||
# Skip benchmark tests if it's in rerun-disabled-mode.
|
||||
@ -440,7 +400,7 @@ test_inductor_cpp_wrapper_shard() {
|
||||
--output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_timm_training.csv"
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv"
|
||||
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
@ -450,7 +410,7 @@ test_inductor_cpp_wrapper_shard() {
|
||||
--bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_torchbench_inference.csv"
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
|
||||
fi
|
||||
}
|
||||
|
||||
@ -483,8 +443,6 @@ elif [[ "${TEST_CONFIG}" == *aot_eager* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--backend aot_eager)
|
||||
elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--export-aot-inductor)
|
||||
elif [[ "${TEST_CONFIG}" == *max_autotune_inductor* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--inductor --inductor-compile-mode max-autotune)
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* && "${TEST_CONFIG}" != *perf* ]]; then
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--inductor)
|
||||
fi
|
||||
@ -499,59 +457,6 @@ else
|
||||
DYNAMO_BENCHMARK_FLAGS+=(--device cuda)
|
||||
fi
|
||||
|
||||
test_cachebench() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
local BENCHMARK
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
local BENCHMARK=torchbench
|
||||
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
|
||||
local BENCHMARK=huggingface
|
||||
else
|
||||
echo "invalid SHARD_NUMBER: ${SHARD_NUMBER}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
local mode_options=("training" "inference")
|
||||
|
||||
for mode in "${mode_options[@]}"; do
|
||||
$TASKSET python "benchmarks/dynamo/cachebench.py" \
|
||||
--mode "$mode" \
|
||||
--device cuda \
|
||||
--benchmark "$BENCHMARK" \
|
||||
--repeat 3 \
|
||||
--output "$TEST_REPORTS_DIR/cachebench_${BENCHMARK}_${mode}.json"
|
||||
|
||||
$TASKSET python "benchmarks/dynamo/cachebench.py" \
|
||||
--mode "$mode" \
|
||||
--dynamic \
|
||||
--device cuda \
|
||||
--benchmark "$BENCHMARK" \
|
||||
--repeat 3 \
|
||||
--output "$TEST_REPORTS_DIR/cachebench_${BENCHMARK}_${mode}_dynamic.json"
|
||||
done
|
||||
}
|
||||
|
||||
test_verify_cachebench() {
|
||||
TMP_TEST_REPORTS_DIR=$(mktemp -d)
|
||||
TEST_OUTPUT="$TMP_TEST_REPORTS_DIR/test.json"
|
||||
|
||||
$TASKSET python "benchmarks/dynamo/cachebench.py" \
|
||||
--mode training \
|
||||
--device cpu \
|
||||
--model nanogpt \
|
||||
--benchmark torchbench \
|
||||
--output "$TEST_OUTPUT"
|
||||
|
||||
# -s checks file exists and is non empty
|
||||
if [[ ! -s "$TEST_OUTPUT" ]]; then
|
||||
echo "Cachebench failed to produce an output."
|
||||
echo "Run 'python benchmarks/dynamo/cachebench.py' to make sure it works"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
test_perf_for_dashboard() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
@ -580,10 +485,6 @@ test_perf_for_dashboard() {
|
||||
test_inductor_set_cpu_affinity
|
||||
elif [[ "${TEST_CONFIG}" == *cuda_a10g* ]]; then
|
||||
device=cuda_a10g
|
||||
elif [[ "${TEST_CONFIG}" == *h100* ]]; then
|
||||
device=cuda_h100
|
||||
elif [[ "${TEST_CONFIG}" == *rocm* ]]; then
|
||||
device=rocm
|
||||
fi
|
||||
|
||||
for mode in "${modes[@]}"; do
|
||||
@ -599,6 +500,7 @@ test_perf_for_dashboard() {
|
||||
elif [[ "$target" == "accuracy" ]]; then
|
||||
target_flag+=( --no-translation-validation)
|
||||
fi
|
||||
target_flag+=( --only hf_T5)
|
||||
|
||||
if [[ "$DASHBOARD_TAG" == *default-true* ]]; then
|
||||
$TASKSET python "benchmarks/dynamo/$suite.py" \
|
||||
@ -616,7 +518,7 @@ test_perf_for_dashboard() {
|
||||
--dynamic-batch-only "$@" \
|
||||
--output "$TEST_REPORTS_DIR/${backend}_dynamic_${suite}_${dtype}_${mode}_${device}_${target}.csv"
|
||||
fi
|
||||
if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]]; then
|
||||
if [[ "$DASHBOARD_TAG" == *cppwrapper-true* ]] && [[ "$mode" == "inference" ]]; then
|
||||
TORCHINDUCTOR_CPP_WRAPPER=1 $TASKSET python "benchmarks/dynamo/$suite.py" \
|
||||
"${target_flag[@]}" --"$mode" --"$dtype" --backend "$backend" --disable-cudagraphs "$@" \
|
||||
--output "$TEST_REPORTS_DIR/${backend}_cpp_wrapper_${suite}_${dtype}_${mode}_${device}_${target}.csv"
|
||||
@ -700,25 +602,20 @@ test_single_dynamo_benchmark() {
|
||||
TEST_CONFIG=${TEST_CONFIG//_avx512/}
|
||||
fi
|
||||
python "benchmarks/dynamo/$suite.py" \
|
||||
--ci --accuracy --timing --explain --print-compilation-time \
|
||||
--ci --accuracy --timing --explain \
|
||||
"${DYNAMO_BENCHMARK_FLAGS[@]}" \
|
||||
"$@" "${partition_flags[@]}" \
|
||||
--output "$TEST_REPORTS_DIR/${name}_${suite}.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}${TEST_CONFIG}_${name}.csv"
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
|
||||
python benchmarks/dynamo/check_graph_breaks.py \
|
||||
--actual "$TEST_REPORTS_DIR/${name}_$suite.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}${TEST_CONFIG}_${name}.csv"
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${TEST_CONFIG}_${name}.csv"
|
||||
fi
|
||||
}
|
||||
|
||||
test_inductor_micro_benchmark() {
|
||||
# torchao requires cuda 8.0 or above for bfloat16 support
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
export TORCH_CUDA_ARCH_LIST="8.0;8.6"
|
||||
fi
|
||||
install_torchao
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
||||
test_inductor_set_cpu_affinity
|
||||
@ -732,7 +629,7 @@ test_inductor_halide() {
|
||||
}
|
||||
|
||||
test_inductor_triton_cpu() {
|
||||
python test/run_test.py --include inductor/test_triton_cpu_backend.py inductor/test_torchinductor_strided_blocks.py --verbose
|
||||
python test/run_test.py --include inductor/test_triton_cpu_backend.py --verbose
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
@ -762,8 +659,6 @@ test_dynamo_benchmark() {
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *aot_inductor* ]]; then
|
||||
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
|
||||
elif [[ "${TEST_CONFIG}" == *max_autotune_inductor* ]]; then
|
||||
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
|
||||
else
|
||||
test_single_dynamo_benchmark "inference" "$suite" "$shard_id" --inference --bfloat16 "$@"
|
||||
test_single_dynamo_benchmark "training" "$suite" "$shard_id" --training --amp "$@"
|
||||
@ -798,7 +693,7 @@ test_inductor_torchbench_smoketest_perf() {
|
||||
--only $test --output "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/inductor_warm_start_smoketest_$test.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/${MAYBE_ROCM}inductor_huggingface_training.csv"
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_huggingface_training.csv"
|
||||
done
|
||||
}
|
||||
|
||||
@ -994,20 +889,10 @@ test_libtorch_api() {
|
||||
else
|
||||
# Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy
|
||||
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest"
|
||||
|
||||
# On s390x, pytorch is built without llvm.
|
||||
# Even if it would be built with llvm, llvm currently doesn't support used features on s390x and
|
||||
# test fails with errors like:
|
||||
# JIT session error: Unsupported target machine architecture in ELF object pytorch-jitted-objectbuffer
|
||||
# unknown file: Failure
|
||||
# C++ exception with description "valOrErr INTERNAL ASSERT FAILED at "/var/lib/jenkins/workspace/torch/csrc/jit/tensorexpr/llvm_jit.h":34, please report a bug to PyTorch. Unexpected failure in LLVM JIT: Failed to materialize symbols: { (main, { func }) }
|
||||
if [[ "${BUILD_ENVIRONMENT}" != *s390x* ]]; then
|
||||
python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
|
||||
fi
|
||||
python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
|
||||
fi
|
||||
|
||||
# quantization is not fully supported on s390x yet
|
||||
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* && "${BUILD_ENVIRONMENT}" != *s390x* ]]; then
|
||||
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* && "${BUILD_ENVIRONMENT}" != *asan* ]]; then
|
||||
# NB: This test is not under TORCH_BIN_DIR but under BUILD_BIN_DIR
|
||||
export CPP_TESTS_DIR="${BUILD_BIN_DIR}"
|
||||
python test/run_test.py --cpp --verbose -i cpp/static_runtime_test
|
||||
@ -1068,9 +953,6 @@ test_distributed() {
|
||||
python test/run_test.py --cpp --verbose -i cpp/HashStoreTest
|
||||
python test/run_test.py --cpp --verbose -i cpp/TCPStoreTest
|
||||
|
||||
echo "Testing multi-GPU linalg tests"
|
||||
python test/run_test.py -i test_linalg.py -k test_matmul_offline_mgpu_tunable --verbose
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
MPIEXEC=$(command -v mpiexec)
|
||||
if [[ -n "$MPIEXEC" ]]; then
|
||||
@ -1173,7 +1055,7 @@ build_xla() {
|
||||
apply_patches
|
||||
SITE_PACKAGES="$(python -c 'from distutils.sysconfig import get_python_lib; print(get_python_lib())')"
|
||||
# These functions are defined in .circleci/common.sh in pytorch/xla repo
|
||||
retry install_pre_deps_pytorch_xla $XLA_DIR $USE_CACHE
|
||||
retry install_deps_pytorch_xla $XLA_DIR $USE_CACHE
|
||||
CMAKE_PREFIX_PATH="${SITE_PACKAGES}/torch:${CMAKE_PREFIX_PATH}" XLA_SANDBOX_BUILD=1 build_torch_xla $XLA_DIR
|
||||
assert_git_not_dirty
|
||||
}
|
||||
@ -1354,7 +1236,7 @@ EOF
|
||||
}
|
||||
|
||||
test_bazel() {
|
||||
set -e -o pipefail
|
||||
set -e
|
||||
|
||||
# bazel test needs sccache setup.
|
||||
# shellcheck source=./common-build.sh
|
||||
@ -1474,13 +1356,14 @@ test_executorch() {
|
||||
pushd /executorch
|
||||
|
||||
export PYTHON_EXECUTABLE=python
|
||||
export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||
export EXECUTORCH_BUILD_PYBIND=ON
|
||||
export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||
|
||||
# For llama3
|
||||
bash examples/models/llama3_2_vision/install_requirements.sh
|
||||
# NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch
|
||||
# from the PR
|
||||
bash .ci/scripts/setup-linux.sh --build-tool cmake
|
||||
bash .ci/scripts/setup-linux.sh cmake
|
||||
|
||||
echo "Run ExecuTorch unit tests"
|
||||
pytest -v -n auto
|
||||
@ -1503,8 +1386,7 @@ test_executorch() {
|
||||
|
||||
test_linux_aarch64() {
|
||||
python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
|
||||
test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \
|
||||
test_foreach test_reductions test_unary_ufuncs test_tensor_creation_ops test_ops \
|
||||
test_transformers test_multiprocessing test_numpy_interop \
|
||||
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
||||
|
||||
# Dynamo tests
|
||||
@ -1522,39 +1404,17 @@ test_linux_aarch64() {
|
||||
inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \
|
||||
inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \
|
||||
inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes inductor/test_memory \
|
||||
inductor/test_triton_cpu_backend inductor/test_triton_extension_backend inductor/test_mkldnn_pattern_matcher inductor/test_cpu_cpp_wrapper \
|
||||
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
||||
}
|
||||
|
||||
test_operator_benchmark() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
TEST_DIR=$(pwd)
|
||||
|
||||
test_inductor_set_cpu_affinity
|
||||
|
||||
cd benchmarks/operator_benchmark/pt_extension
|
||||
python setup.py install
|
||||
|
||||
cd "${TEST_DIR}"/benchmarks/operator_benchmark
|
||||
$TASKSET python -m benchmark_all_test --device "$1" --tag-filter "$2" \
|
||||
--output-dir "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv"
|
||||
|
||||
pip_install pandas
|
||||
python check_perf_csv.py \
|
||||
--actual "${TEST_REPORTS_DIR}/operator_benchmark_eager_float32_cpu.csv" \
|
||||
--expected "expected_ci_operator_benchmark_eager_float32_cpu.csv"
|
||||
}
|
||||
|
||||
|
||||
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
|
||||
(cd test && python -c "import torch; print(torch.__config__.show())")
|
||||
(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
|
||||
fi
|
||||
if [[ "${TEST_CONFIG}" == *numpy_2* ]]; then
|
||||
# Install numpy-2.0.2 and compatible scipy & numba versions
|
||||
python -mpip install --pre numpy==2.0.2 scipy==1.13.1 numba==0.60.0
|
||||
python test/run_test.py --include dynamo/test_functions.py dynamo/test_unspec.py test_binary_ufuncs.py test_fake_tensor.py test_linalg.py test_numpy_interop.py test_tensor_creation_ops.py test_torch.py torch_np/test_basic.py
|
||||
# Install numpy-2.0.2 and test inductor tracing
|
||||
python -mpip install --pre numpy==2.0.2
|
||||
python test/run_test.py --include dynamo/test_unspec.py
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then
|
||||
test_linux_aarch64
|
||||
elif [[ "${TEST_CONFIG}" == *backward* ]]; then
|
||||
@ -1577,19 +1437,6 @@ elif [[ "$TEST_CONFIG" == distributed ]]; then
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
test_rpc
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then
|
||||
TEST_MODE="short"
|
||||
|
||||
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *long* ]]; then
|
||||
TEST_MODE="long"
|
||||
elif [[ "${TEST_CONFIG}" == *all* ]]; then
|
||||
TEST_MODE="all"
|
||||
fi
|
||||
|
||||
test_operator_benchmark cpu ${TEST_MODE}
|
||||
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
|
||||
test_inductor_distributed
|
||||
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
|
||||
@ -1606,16 +1453,6 @@ elif [[ "${TEST_CONFIG}" == *timm* ]]; then
|
||||
install_torchvision
|
||||
id=$((SHARD_NUMBER-1))
|
||||
test_dynamo_benchmark timm_models "$id"
|
||||
elif [[ "${TEST_CONFIG}" == cachebench ]]; then
|
||||
install_torchaudio cuda
|
||||
install_torchvision
|
||||
checkout_install_torchbench nanogpt BERT_pytorch resnet50 hf_T5 llama moco
|
||||
PYTHONPATH=$(pwd)/torchbench test_cachebench
|
||||
elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
|
||||
install_torchaudio cpu
|
||||
install_torchvision
|
||||
checkout_install_torchbench nanogpt
|
||||
PYTHONPATH=$(pwd)/torchbench test_verify_cachebench
|
||||
elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
||||
install_torchaudio cpu
|
||||
@ -1651,8 +1488,7 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
|
||||
install_torchaudio cuda
|
||||
install_torchvision
|
||||
checkout_install_torchbench hf_T5 llama moco
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper_shard "$SHARD_NUMBER"
|
||||
test_inductor_aoti
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||
install_torchvision
|
||||
test_inductor_shard "${SHARD_NUMBER}"
|
||||
@ -1672,7 +1508,6 @@ elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then
|
||||
test_python_shard "$SHARD_NUMBER"
|
||||
test_aten
|
||||
elif [[ "${SHARD_NUMBER}" == 1 && $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_lazy_tensor_meta_reference_disabled
|
||||
test_without_numpy
|
||||
install_torchvision
|
||||
test_python_shard 1
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
|
||||
project(simple-torch-test)
|
||||
|
||||
find_package(Torch REQUIRED)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_CXX_FLAGS}")
|
||||
|
||||
|
||||
add_executable(simple-torch-test simple-torch-test.cpp)
|
||||
target_include_directories(simple-torch-test PRIVATE ${TORCH_INCLUDE_DIRS})
|
||||
target_link_libraries(simple-torch-test "${TORCH_LIBRARIES}")
|
||||
set_property(TARGET simple-torch-test PROPERTY CXX_STANDARD 17)
|
||||
|
||||
find_package(CUDAToolkit 11.8)
|
||||
|
||||
target_link_libraries(simple-torch-test CUDA::cudart CUDA::cufft CUDA::cusparse CUDA::cublas CUDA::cusolver)
|
||||
find_library(CUDNN_LIBRARY NAMES cudnn)
|
||||
target_link_libraries(simple-torch-test ${CUDNN_LIBRARY} )
|
||||
if(MSVC)
|
||||
file(GLOB TORCH_DLLS "$ENV{CUDA_PATH}/bin/cudnn64_8.dll" "$ENV{NVTOOLSEXT_PATH}/bin/x64/*.dll")
|
||||
message("dlls to copy " ${TORCH_DLLS})
|
||||
add_custom_command(TARGET simple-torch-test
|
||||
POST_BUILD
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different
|
||||
${TORCH_DLLS}
|
||||
$<TARGET_FILE_DIR:simple-torch-test>)
|
||||
endif(MSVC)
|
||||
@ -1,15 +0,0 @@
|
||||
#include <torch/torch.h>
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
std::cout << "Checking that CUDA archs are setup correctly" << std::endl;
|
||||
TORCH_CHECK(torch::rand({ 3, 5 }, torch::Device(torch::kCUDA)).defined(), "CUDA archs are not setup correctly");
|
||||
|
||||
// These have to run after CUDA is initialized
|
||||
|
||||
std::cout << "Checking that magma is available" << std::endl;
|
||||
TORCH_CHECK(torch::hasMAGMA(), "MAGMA is not available");
|
||||
|
||||
std::cout << "Checking that CuDNN is available" << std::endl;
|
||||
TORCH_CHECK(torch::cuda::cudnn_is_available(), "CuDNN is not available");
|
||||
return 0;
|
||||
}
|
||||
@ -1,6 +0,0 @@
|
||||
#include <torch/torch.h>
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
TORCH_CHECK(torch::hasMKL(), "MKL is not available");
|
||||
return 0;
|
||||
}
|
||||
@ -1,7 +0,0 @@
|
||||
#include <ATen/ATen.h>
|
||||
#include <torch/torch.h>
|
||||
|
||||
int main(int argc, const char* argv[]) {
|
||||
TORCH_CHECK(at::globalContext().isXNNPACKAvailable(), "XNNPACK is not available");
|
||||
return 0;
|
||||
}
|
||||
@ -1,38 +0,0 @@
|
||||
r"""
|
||||
It's used to check basic rnn features with cuda.
|
||||
For example, it would throw exception if some components are missing
|
||||
"""
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import torch.optim as optim
|
||||
|
||||
|
||||
class SimpleCNN(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.conv = nn.Conv2d(1, 1, 3)
|
||||
self.pool = nn.MaxPool2d(2, 2)
|
||||
|
||||
def forward(self, inputs):
|
||||
output = self.pool(F.relu(self.conv(inputs)))
|
||||
output = output.view(1)
|
||||
return output
|
||||
|
||||
|
||||
# Mock one infer
|
||||
device = torch.device("cuda:0")
|
||||
net = SimpleCNN().to(device)
|
||||
net_inputs = torch.rand((1, 1, 5, 5), device=device)
|
||||
outputs = net(net_inputs)
|
||||
print(outputs)
|
||||
|
||||
criterion = nn.MSELoss()
|
||||
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.1)
|
||||
|
||||
# Mock one step training
|
||||
label = torch.full((1,), 1.0, dtype=torch.float, device=device)
|
||||
loss = criterion(outputs, label)
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user