mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-24 15:44:58 +08:00
Compare commits
1 Commits
desertfire
...
python_com
| Author | SHA1 | Date | |
|---|---|---|---|
| e5937dc68c |
@ -1 +1 @@
|
||||
6.5.0
|
||||
6.1.1
|
||||
|
||||
23
.buckconfig.oss
Normal file
23
.buckconfig.oss
Normal file
@ -0,0 +1,23 @@
|
||||
[pt]
|
||||
is_oss=1
|
||||
|
||||
[buildfile]
|
||||
name = BUCK.oss
|
||||
includes = //tools/build_defs/select.bzl
|
||||
|
||||
[repositories]
|
||||
bazel_skylib = third_party/bazel-skylib/
|
||||
ovr_config = .
|
||||
|
||||
[download]
|
||||
in_build = true
|
||||
|
||||
[cxx]
|
||||
cxxflags = -std=c++17
|
||||
ldflags = -Wl,--no-undefined
|
||||
should_remap_host_platform = true
|
||||
cpp = /usr/bin/clang
|
||||
cc = /usr/bin/clang
|
||||
cxx = /usr/bin/clang++
|
||||
cxxpp = /usr/bin/clang++
|
||||
ld = /usr/bin/clang++
|
||||
@ -1,19 +0,0 @@
|
||||
# Aarch64 (ARM/Graviton) Support Scripts
|
||||
Scripts for building aarch64 PyTorch PIP Wheels. These scripts build the following wheels:
|
||||
* torch
|
||||
* torchvision
|
||||
* torchaudio
|
||||
* torchtext
|
||||
* torchdata
|
||||
## Aarch64_ci_build.sh
|
||||
This script is design to support CD operations within PyPi manylinux aarch64 container, and be executed in the container. It prepares the container and then executes __aarch64_wheel_ci_build.py__ to build the wheels. The script "assumes" the PyTorch repo is located at: ```/pytorch``` and will put the wheels into ```/artifacts```.
|
||||
### Usage
|
||||
```DESIRED_PYTHON=<PythonVersion> aarch64_ci_build.sh```
|
||||
|
||||
__NOTE:__ CI build is currently __EXPERMINTAL__
|
||||
|
||||
## Build_aarch64_wheel.py
|
||||
This app allows a person to build using AWS EC3 resources and requires AWS-CLI and Boto3 with AWS credentials to support building EC2 instances for the wheel builds. Can be used in a codebuild CD or from a local system.
|
||||
|
||||
### Usage
|
||||
```build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>```
|
||||
@ -1,39 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux -o pipefail
|
||||
|
||||
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||
|
||||
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
|
||||
source $SCRIPTPATH/aarch64_ci_setup.sh
|
||||
|
||||
tagged_version() {
|
||||
GIT_DESCRIBE="git --git-dir /pytorch/.git describe --tags --match v[0-9]*.[0-9]*.[0-9]*"
|
||||
if ${GIT_DESCRIBE} --exact >/dev/null; then
|
||||
${GIT_DESCRIBE}
|
||||
else
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
if tagged_version >/dev/null; then
|
||||
export OVERRIDE_PACKAGE_VERSION="$(tagged_version | sed -e 's/^v//' -e 's/-.*$//')"
|
||||
fi
|
||||
|
||||
###############################################################################
|
||||
# Run aarch64 builder python
|
||||
###############################################################################
|
||||
cd /
|
||||
# adding safe directory for git as the permissions will be
|
||||
# on the mounted pytorch repo
|
||||
git config --global --add safe.directory /pytorch
|
||||
pip install -r /pytorch/requirements.txt
|
||||
pip install auditwheel
|
||||
if [ "$DESIRED_CUDA" = "cpu" ]; then
|
||||
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
|
||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
|
||||
else
|
||||
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
|
||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
|
||||
fi
|
||||
@ -1,23 +0,0 @@
|
||||
#!/bin/bash
|
||||
set -eux -o pipefail
|
||||
|
||||
# This script is used to prepare the Docker container for aarch64_ci_wheel_build.py python script
|
||||
# By creating symlinks from desired /opt/python to /usr/local/bin/
|
||||
|
||||
NUMPY_VERSION=2.0.2
|
||||
PYGIT2_VERSION=1.15.1
|
||||
if [[ "$DESIRED_PYTHON" == "3.13" ]]; then
|
||||
NUMPY_VERSION=2.1.2
|
||||
PYGIT2_VERSION=1.16.0
|
||||
fi
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
source $SCRIPTPATH/../manywheel/set_desired_python.sh
|
||||
|
||||
pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2 pygit2==${PYGIT2_VERSION}
|
||||
|
||||
for tool in python python3 pip pip3 ninja scons patchelf; do
|
||||
ln -sf ${DESIRED_PYTHON_BIN_DIR}/${tool} /usr/local/bin;
|
||||
done
|
||||
|
||||
python --version
|
||||
@ -1,230 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
# encoding: UTF-8
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from subprocess import check_call, check_output
|
||||
from typing import List
|
||||
|
||||
from pygit2 import Repository
|
||||
|
||||
|
||||
def list_dir(path: str) -> List[str]:
|
||||
"""'
|
||||
Helper for getting paths for Python
|
||||
"""
|
||||
return check_output(["ls", "-1", path]).decode().split("\n")
|
||||
|
||||
|
||||
def build_ArmComputeLibrary() -> None:
|
||||
"""
|
||||
Using ArmComputeLibrary for aarch64 PyTorch
|
||||
"""
|
||||
print("Building Arm Compute Library")
|
||||
acl_build_flags = [
|
||||
"debug=0",
|
||||
"neon=1",
|
||||
"opencl=0",
|
||||
"os=linux",
|
||||
"openmp=1",
|
||||
"cppthreads=0",
|
||||
"arch=armv8a",
|
||||
"multi_isa=1",
|
||||
"fixed_format_kernels=1",
|
||||
"build=native",
|
||||
]
|
||||
acl_install_dir = "/acl"
|
||||
acl_checkout_dir = "ComputeLibrary"
|
||||
os.makedirs(acl_install_dir)
|
||||
check_call(
|
||||
[
|
||||
"git",
|
||||
"clone",
|
||||
"https://github.com/ARM-software/ComputeLibrary.git",
|
||||
"-b",
|
||||
"v24.09",
|
||||
"--depth",
|
||||
"1",
|
||||
"--shallow-submodules",
|
||||
]
|
||||
)
|
||||
|
||||
check_call(
|
||||
["scons", "Werror=1", "-j8", f"build_dir=/{acl_install_dir}/build"]
|
||||
+ acl_build_flags,
|
||||
cwd=acl_checkout_dir,
|
||||
)
|
||||
for d in ["arm_compute", "include", "utils", "support", "src"]:
|
||||
shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")
|
||||
|
||||
|
||||
def update_wheel(wheel_path) -> None:
|
||||
"""
|
||||
Update the cuda wheel libraries
|
||||
"""
|
||||
folder = os.path.dirname(wheel_path)
|
||||
wheelname = os.path.basename(wheel_path)
|
||||
os.mkdir(f"{folder}/tmp")
|
||||
os.system(f"unzip {wheel_path} -d {folder}/tmp")
|
||||
libs_to_copy = [
|
||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
|
||||
"/usr/local/cuda/lib64/libcudnn.so.9",
|
||||
"/usr/local/cuda/lib64/libcublas.so.12",
|
||||
"/usr/local/cuda/lib64/libcublasLt.so.12",
|
||||
"/usr/local/cuda/lib64/libcudart.so.12",
|
||||
"/usr/local/cuda/lib64/libcufft.so.11",
|
||||
"/usr/local/cuda/lib64/libcusparse.so.12",
|
||||
"/usr/local/cuda/lib64/libcusparseLt.so.0",
|
||||
"/usr/local/cuda/lib64/libcusolver.so.11",
|
||||
"/usr/local/cuda/lib64/libcurand.so.10",
|
||||
"/usr/local/cuda/lib64/libnvToolsExt.so.1",
|
||||
"/usr/local/cuda/lib64/libnvJitLink.so.12",
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.12",
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.4",
|
||||
"/usr/local/cuda/lib64/libcudnn_adv.so.9",
|
||||
"/usr/local/cuda/lib64/libcudnn_cnn.so.9",
|
||||
"/usr/local/cuda/lib64/libcudnn_graph.so.9",
|
||||
"/usr/local/cuda/lib64/libcudnn_ops.so.9",
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9",
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9",
|
||||
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9",
|
||||
"/lib64/libgomp.so.1",
|
||||
"/usr/lib64/libgfortran.so.5",
|
||||
"/acl/build/libarm_compute.so",
|
||||
"/acl/build/libarm_compute_graph.so",
|
||||
]
|
||||
if enable_cuda:
|
||||
libs_to_copy += [
|
||||
"/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
|
||||
"/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
|
||||
"/usr/local/lib/libnvpl_lapack_core.so.0",
|
||||
"/usr/local/lib/libnvpl_blas_core.so.0",
|
||||
]
|
||||
else:
|
||||
libs_to_copy += [
|
||||
"/opt/OpenBLAS/lib/libopenblas.so.0",
|
||||
]
|
||||
# Copy libraries to unzipped_folder/a/lib
|
||||
for lib_path in libs_to_copy:
|
||||
lib_name = os.path.basename(lib_path)
|
||||
shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}")
|
||||
os.system(
|
||||
f"cd {folder}/tmp/torch/lib/; "
|
||||
f"patchelf --set-rpath '$ORIGIN' --force-rpath {folder}/tmp/torch/lib/{lib_name}"
|
||||
)
|
||||
os.mkdir(f"{folder}/cuda_wheel")
|
||||
os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
|
||||
shutil.move(
|
||||
f"{folder}/cuda_wheel/{wheelname}",
|
||||
f"{folder}/{wheelname}",
|
||||
copy_function=shutil.copy2,
|
||||
)
|
||||
os.system(f"rm -rf {folder}/tmp/ {folder}/cuda_wheel/")
|
||||
|
||||
|
||||
def complete_wheel(folder: str) -> str:
|
||||
"""
|
||||
Complete wheel build and put in artifact location
|
||||
"""
|
||||
wheel_name = list_dir(f"/{folder}/dist")[0]
|
||||
|
||||
if "pytorch" in folder and not enable_cuda:
|
||||
print("Repairing Wheel with AuditWheel")
|
||||
check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder)
|
||||
repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0]
|
||||
|
||||
print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist")
|
||||
os.rename(
|
||||
f"/{folder}/wheelhouse/{repaired_wheel_name}",
|
||||
f"/{folder}/dist/{repaired_wheel_name}",
|
||||
)
|
||||
else:
|
||||
repaired_wheel_name = wheel_name
|
||||
|
||||
print(f"Copying {repaired_wheel_name} to artifacts")
|
||||
shutil.copy2(
|
||||
f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}"
|
||||
)
|
||||
|
||||
return repaired_wheel_name
|
||||
|
||||
|
||||
def parse_arguments():
|
||||
"""
|
||||
Parse inline arguments
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
|
||||
parser = ArgumentParser("AARCH64 wheels python CD")
|
||||
parser.add_argument("--debug", action="store_true")
|
||||
parser.add_argument("--build-only", action="store_true")
|
||||
parser.add_argument("--test-only", type=str)
|
||||
parser.add_argument("--enable-mkldnn", action="store_true")
|
||||
parser.add_argument("--enable-cuda", action="store_true")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
"""
|
||||
Entry Point
|
||||
"""
|
||||
args = parse_arguments()
|
||||
enable_mkldnn = args.enable_mkldnn
|
||||
enable_cuda = args.enable_cuda
|
||||
repo = Repository("/pytorch")
|
||||
branch = repo.head.name
|
||||
if branch == "HEAD":
|
||||
branch = "master"
|
||||
|
||||
print("Building PyTorch wheel")
|
||||
build_vars = "MAX_JOBS=5 CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
|
||||
os.system("cd /pytorch; python setup.py clean")
|
||||
|
||||
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
|
||||
if override_package_version is not None:
|
||||
version = override_package_version
|
||||
build_vars += (
|
||||
f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
|
||||
)
|
||||
elif branch in ["nightly", "master"]:
|
||||
build_date = (
|
||||
check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch")
|
||||
.decode()
|
||||
.replace("-", "")
|
||||
)
|
||||
version = (
|
||||
check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2]
|
||||
)
|
||||
if enable_cuda:
|
||||
desired_cuda = os.getenv("DESIRED_CUDA")
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date}+{desired_cuda} PYTORCH_BUILD_NUMBER=1 "
|
||||
else:
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
|
||||
elif branch.startswith(("v1.", "v2.")):
|
||||
build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1:branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
|
||||
|
||||
if enable_mkldnn:
|
||||
build_ArmComputeLibrary()
|
||||
print("build pytorch with mkldnn+acl backend")
|
||||
build_vars += (
|
||||
"USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
|
||||
"ACL_ROOT_DIR=/acl "
|
||||
"LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH "
|
||||
"ACL_INCLUDE_DIR=/acl/build "
|
||||
"ACL_LIBRARY=/acl/build "
|
||||
)
|
||||
if enable_cuda:
|
||||
build_vars += "BLAS=NVPL "
|
||||
else:
|
||||
build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS "
|
||||
else:
|
||||
print("build pytorch without mkldnn backend")
|
||||
|
||||
os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
|
||||
if enable_cuda:
|
||||
print("Updating Cuda Dependency")
|
||||
filename = os.listdir("/pytorch/dist/")
|
||||
wheel_path = f"/pytorch/dist/{filename[0]}"
|
||||
update_wheel(wheel_path)
|
||||
pytorch_wheel_name = complete_wheel("/pytorch/")
|
||||
print(f"Build Complete. Created {pytorch_wheel_name}..")
|
||||
File diff suppressed because it is too large
Load Diff
@ -1,87 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import os
|
||||
import shutil
|
||||
import sys
|
||||
from subprocess import check_call
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
from auditwheel.elfutils import elf_file_filter
|
||||
from auditwheel.lddtree import lddtree
|
||||
from auditwheel.patcher import Patchelf
|
||||
from auditwheel.repair import copylib
|
||||
from auditwheel.wheeltools import InWheelCtx
|
||||
|
||||
|
||||
def replace_tag(filename):
|
||||
with open(filename) as f:
|
||||
lines = f.read().split("\\n")
|
||||
for i, line in enumerate(lines):
|
||||
if not line.startswith("Tag: "):
|
||||
continue
|
||||
lines[i] = line.replace("-linux_", "-manylinux2014_")
|
||||
print(f"Updated tag from {line} to {lines[i]}")
|
||||
|
||||
with open(filename, "w") as f:
|
||||
f.write("\\n".join(lines))
|
||||
|
||||
|
||||
class AlignedPatchelf(Patchelf):
|
||||
def set_soname(self, file_name: str, new_soname: str) -> None:
|
||||
check_call(
|
||||
["patchelf", "--page-size", "65536", "--set-soname", new_soname, file_name]
|
||||
)
|
||||
|
||||
def replace_needed(self, file_name: str, soname: str, new_soname: str) -> None:
|
||||
check_call(
|
||||
[
|
||||
"patchelf",
|
||||
"--page-size",
|
||||
"65536",
|
||||
"--replace-needed",
|
||||
soname,
|
||||
new_soname,
|
||||
file_name,
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def embed_library(whl_path, lib_soname, update_tag=False):
|
||||
patcher = AlignedPatchelf()
|
||||
out_dir = TemporaryDirectory()
|
||||
whl_name = os.path.basename(whl_path)
|
||||
tmp_whl_name = os.path.join(out_dir.name, whl_name)
|
||||
with InWheelCtx(whl_path) as ctx:
|
||||
torchlib_path = os.path.join(ctx._tmpdir.name, "torch", "lib")
|
||||
ctx.out_wheel = tmp_whl_name
|
||||
new_lib_path, new_lib_soname = None, None
|
||||
for filename, _ in elf_file_filter(ctx.iter_files()):
|
||||
if not filename.startswith("torch/lib"):
|
||||
continue
|
||||
libtree = lddtree(filename)
|
||||
if lib_soname not in libtree["needed"]:
|
||||
continue
|
||||
lib_path = libtree["libs"][lib_soname]["path"]
|
||||
if lib_path is None:
|
||||
print(f"Can't embed {lib_soname} as it could not be found")
|
||||
break
|
||||
if lib_path.startswith(torchlib_path):
|
||||
continue
|
||||
|
||||
if new_lib_path is None:
|
||||
new_lib_soname, new_lib_path = copylib(lib_path, torchlib_path, patcher)
|
||||
patcher.replace_needed(filename, lib_soname, new_lib_soname)
|
||||
print(f"Replacing {lib_soname} with {new_lib_soname} for {filename}")
|
||||
if update_tag:
|
||||
# Add manylinux2014 tag
|
||||
for filename in ctx.iter_files():
|
||||
if os.path.basename(filename) != "WHEEL":
|
||||
continue
|
||||
replace_tag(filename)
|
||||
shutil.move(tmp_whl_name, whl_path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
embed_library(
|
||||
sys.argv[1], "libgomp.so.1", len(sys.argv) > 2 and sys.argv[2] == "--update-tag"
|
||||
)
|
||||
1
.ci/docker/android/AndroidManifest.xml
Normal file
1
.ci/docker/android/AndroidManifest.xml
Normal file
@ -0,0 +1 @@
|
||||
<manifest package="org.pytorch.deps" />
|
||||
66
.ci/docker/android/build.gradle
Normal file
66
.ci/docker/android/build.gradle
Normal file
@ -0,0 +1,66 @@
|
||||
buildscript {
|
||||
ext {
|
||||
minSdkVersion = 21
|
||||
targetSdkVersion = 28
|
||||
compileSdkVersion = 28
|
||||
buildToolsVersion = '28.0.3'
|
||||
|
||||
coreVersion = "1.2.0"
|
||||
extJUnitVersion = "1.1.1"
|
||||
runnerVersion = "1.2.0"
|
||||
rulesVersion = "1.2.0"
|
||||
junitVersion = "4.12"
|
||||
}
|
||||
|
||||
repositories {
|
||||
google()
|
||||
mavenLocal()
|
||||
mavenCentral()
|
||||
jcenter()
|
||||
}
|
||||
|
||||
dependencies {
|
||||
classpath 'com.android.tools.build:gradle:4.1.2'
|
||||
classpath 'com.vanniktech:gradle-maven-publish-plugin:0.14.2'
|
||||
}
|
||||
}
|
||||
|
||||
repositories {
|
||||
google()
|
||||
jcenter()
|
||||
}
|
||||
|
||||
apply plugin: 'com.android.library'
|
||||
|
||||
android {
|
||||
compileSdkVersion rootProject.compileSdkVersion
|
||||
buildToolsVersion rootProject.buildToolsVersion
|
||||
|
||||
defaultConfig {
|
||||
minSdkVersion minSdkVersion
|
||||
targetSdkVersion targetSdkVersion
|
||||
}
|
||||
|
||||
sourceSets {
|
||||
main {
|
||||
manifest.srcFile 'AndroidManifest.xml'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation 'com.android.support:appcompat-v7:28.0.0'
|
||||
implementation 'androidx.appcompat:appcompat:1.0.0'
|
||||
implementation 'com.facebook.fbjni:fbjni-java-only:0.2.2'
|
||||
implementation 'com.google.code.findbugs:jsr305:3.0.1'
|
||||
implementation 'com.facebook.soloader:nativeloader:0.10.5'
|
||||
|
||||
implementation 'junit:junit:' + rootProject.junitVersion
|
||||
implementation 'androidx.test:core:' + rootProject.coreVersion
|
||||
|
||||
implementation 'junit:junit:' + rootProject.junitVersion
|
||||
implementation 'androidx.test:core:' + rootProject.coreVersion
|
||||
implementation 'androidx.test.ext:junit:' + rootProject.extJUnitVersion
|
||||
implementation 'androidx.test:rules:' + rootProject.rulesVersion
|
||||
implementation 'androidx.test:runner:' + rootProject.runnerVersion
|
||||
}
|
||||
@ -244,6 +244,16 @@ case "$image" in
|
||||
CONDA_CMAKE=yes
|
||||
ONNX=yes
|
||||
;;
|
||||
pytorch-linux-focal-py3-clang9-android-ndk-r21e)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=9
|
||||
LLVMDEV=yes
|
||||
PROTOBUF=yes
|
||||
ANDROID=yes
|
||||
ANDROID_NDK_VERSION=r21e
|
||||
GRADLE_VERSION=6.8.3
|
||||
NINJA_VERSION=1.9.0
|
||||
;;
|
||||
pytorch-linux-focal-py3.9-clang10)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
CLANG_VERSION=10
|
||||
@ -292,7 +302,7 @@ case "$image" in
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
ROCM_VERSION=6.2.4
|
||||
ROCM_VERSION=6.2
|
||||
NINJA_VERSION=1.9.0
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
@ -308,17 +318,6 @@ case "$image" in
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-xpu-2025.0-py3)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
PROTOBUF=yes
|
||||
DB=yes
|
||||
VISION=yes
|
||||
XPU_VERSION=2025.0
|
||||
NINJA_VERSION=1.9.0
|
||||
CONDA_CMAKE=yes
|
||||
TRITON=yes
|
||||
;;
|
||||
pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
|
||||
ANACONDA_PYTHON_VERSION=3.9
|
||||
GCC_VERSION=11
|
||||
@ -415,6 +414,9 @@ case "$image" in
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
# snadampal: skipping sccache due to the following issue
|
||||
# https://github.com/pytorch/pytorch/issues/121559
|
||||
SKIP_SCCACHE_INSTALL=yes
|
||||
# snadampal: skipping llvm src build install because the current version
|
||||
# from pytorch/llvm:9.0.1 is x86 specific
|
||||
SKIP_LLVM_SRC_BUILD_INSTALL=yes
|
||||
@ -427,6 +429,9 @@ case "$image" in
|
||||
DB=yes
|
||||
VISION=yes
|
||||
CONDA_CMAKE=yes
|
||||
# snadampal: skipping sccache due to the following issue
|
||||
# https://github.com/pytorch/pytorch/issues/121559
|
||||
SKIP_SCCACHE_INSTALL=yes
|
||||
# snadampal: skipping llvm src build install because the current version
|
||||
# from pytorch/llvm:9.0.1 is x86 specific
|
||||
SKIP_LLVM_SRC_BUILD_INSTALL=yes
|
||||
@ -503,6 +508,8 @@ docker build \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "CUDNN_VERSION=${CUDNN_VERSION}" \
|
||||
--build-arg "TENSORRT_VERSION=${TENSORRT_VERSION}" \
|
||||
--build-arg "ANDROID=${ANDROID}" \
|
||||
--build-arg "ANDROID_NDK=${ANDROID_NDK_VERSION}" \
|
||||
--build-arg "GRADLE_VERSION=${GRADLE_VERSION}" \
|
||||
--build-arg "VULKAN_SDK_VERSION=${VULKAN_SDK_VERSION}" \
|
||||
--build-arg "SWIFTSHADER=${SWIFTSHADER}" \
|
||||
@ -510,7 +517,7 @@ docker build \
|
||||
--build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
|
||||
--build-arg "KATEX=${KATEX:-}" \
|
||||
--build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
|
||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a}" \
|
||||
--build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx906;gfx90a}" \
|
||||
--build-arg "IMAGE_NAME=${IMAGE_NAME}" \
|
||||
--build-arg "UCX_COMMIT=${UCX_COMMIT}" \
|
||||
--build-arg "UCC_COMMIT=${UCC_COMMIT}" \
|
||||
|
||||
@ -1 +1 @@
|
||||
6f638937d64e3396793956d75ee3e14802022745
|
||||
cd1c833b079adb324871dcbbe75b43d42ffc0ade
|
||||
|
||||
@ -1 +1 @@
|
||||
c7711371cace304afe265c1ffa906415ab82fc66
|
||||
6a333f1b05671f6fada4ba7bbfae4a02a9d96f4f
|
||||
|
||||
@ -1 +1 @@
|
||||
e98b6fcb8df5b44eb0d0addb6767c573d37ba024
|
||||
91b14bf5593cf58a8541f3e6b9125600a867d4ef
|
||||
|
||||
@ -1 +1 @@
|
||||
35c6c7c6284582b3f41c71c150e11b517acf074a
|
||||
cf34004b8a67d290a962da166f5aa2fc66751326
|
||||
|
||||
112
.ci/docker/common/install_android.sh
Executable file
112
.ci/docker/common/install_android.sh
Executable file
@ -0,0 +1,112 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -ex
|
||||
|
||||
[ -n "${ANDROID_NDK}" ]
|
||||
|
||||
_https_amazon_aws=https://ossci-android.s3.amazonaws.com
|
||||
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends autotools-dev autoconf unzip
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
|
||||
pushd /tmp
|
||||
curl -Os --retry 3 $_https_amazon_aws/android-ndk-${ANDROID_NDK}-linux-x86_64.zip
|
||||
popd
|
||||
_ndk_dir=/opt/ndk
|
||||
mkdir -p "$_ndk_dir"
|
||||
unzip -qo /tmp/android*.zip -d "$_ndk_dir"
|
||||
_versioned_dir=$(find "$_ndk_dir/" -mindepth 1 -maxdepth 1 -type d)
|
||||
mv "$_versioned_dir"/* "$_ndk_dir"/
|
||||
rmdir "$_versioned_dir"
|
||||
rm -rf /tmp/*
|
||||
|
||||
# Install OpenJDK
|
||||
# https://hub.docker.com/r/picoded/ubuntu-openjdk-8-jdk/dockerfile/
|
||||
|
||||
sudo apt-get update && \
|
||||
apt-get install -y openjdk-8-jdk && \
|
||||
apt-get install -y ant && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm -rf /var/cache/oracle-jdk8-installer;
|
||||
|
||||
# Fix certificate issues, found as of
|
||||
# https://bugs.launchpad.net/ubuntu/+source/ca-certificates-java/+bug/983302
|
||||
|
||||
sudo apt-get update && \
|
||||
apt-get install -y ca-certificates-java && \
|
||||
apt-get clean && \
|
||||
update-ca-certificates -f && \
|
||||
rm -rf /var/lib/apt/lists/* && \
|
||||
rm -rf /var/cache/oracle-jdk8-installer;
|
||||
|
||||
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64/
|
||||
|
||||
# Installing android sdk
|
||||
# https://github.com/circleci/circleci-images/blob/staging/android/Dockerfile.m4
|
||||
|
||||
_tmp_sdk_zip=/tmp/android-sdk-linux.zip
|
||||
_android_home=/opt/android/sdk
|
||||
|
||||
rm -rf $_android_home
|
||||
sudo mkdir -p $_android_home
|
||||
curl --silent --show-error --location --fail --retry 3 --output /tmp/android-sdk-linux.zip $_https_amazon_aws/android-sdk-linux-tools3859397-build-tools2803-2902-platforms28-29.zip
|
||||
sudo unzip -q $_tmp_sdk_zip -d $_android_home
|
||||
rm $_tmp_sdk_zip
|
||||
|
||||
sudo chmod -R 777 $_android_home
|
||||
|
||||
export ANDROID_HOME=$_android_home
|
||||
export ADB_INSTALL_TIMEOUT=120
|
||||
|
||||
export PATH="${ANDROID_HOME}/tools:${ANDROID_HOME}/tools/bin:${ANDROID_HOME}/platform-tools:${PATH}"
|
||||
echo "PATH:${PATH}"
|
||||
|
||||
# Installing Gradle
|
||||
echo "GRADLE_VERSION:${GRADLE_VERSION}"
|
||||
_gradle_home=/opt/gradle
|
||||
sudo rm -rf $gradle_home
|
||||
sudo mkdir -p $_gradle_home
|
||||
|
||||
curl --silent --output /tmp/gradle.zip --retry 3 $_https_amazon_aws/gradle-${GRADLE_VERSION}-bin.zip
|
||||
|
||||
sudo unzip -q /tmp/gradle.zip -d $_gradle_home
|
||||
rm /tmp/gradle.zip
|
||||
|
||||
sudo chmod -R 777 $_gradle_home
|
||||
|
||||
export GRADLE_HOME=$_gradle_home/gradle-$GRADLE_VERSION
|
||||
alias gradle="${GRADLE_HOME}/bin/gradle"
|
||||
|
||||
export PATH="${GRADLE_HOME}/bin/:${PATH}"
|
||||
echo "PATH:${PATH}"
|
||||
|
||||
gradle --version
|
||||
|
||||
mkdir /var/lib/jenkins/gradledeps
|
||||
cp build.gradle /var/lib/jenkins/gradledeps
|
||||
cp AndroidManifest.xml /var/lib/jenkins/gradledeps
|
||||
|
||||
pushd /var/lib/jenkins
|
||||
|
||||
export GRADLE_LOCAL_PROPERTIES=gradledeps/local.properties
|
||||
rm -f $GRADLE_LOCAL_PROPERTIES
|
||||
echo "sdk.dir=/opt/android/sdk" >> $GRADLE_LOCAL_PROPERTIES
|
||||
echo "ndk.dir=/opt/ndk" >> $GRADLE_LOCAL_PROPERTIES
|
||||
|
||||
chown -R jenkins /var/lib/jenkins/gradledeps
|
||||
chgrp -R jenkins /var/lib/jenkins/gradledeps
|
||||
|
||||
sudo -H -u jenkins $GRADLE_HOME/bin/gradle -Pandroid.useAndroidX=true -p /var/lib/jenkins/gradledeps -g /var/lib/jenkins/.gradle --refresh-dependencies --debug --stacktrace assemble
|
||||
|
||||
chown -R jenkins /var/lib/jenkins/.gradle
|
||||
chgrp -R jenkins /var/lib/jenkins/.gradle
|
||||
|
||||
popd
|
||||
|
||||
rm -rf /var/lib/jenkins/.gradle/daemon
|
||||
|
||||
# Cache vision models used by the test
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/cache_vision_models.sh"
|
||||
@ -9,12 +9,7 @@ install_ubuntu() {
|
||||
# Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
|
||||
apt-get install -y cargo
|
||||
echo "Checking out sccache repo"
|
||||
if [ -n "$CUDA_VERSION" ]; then
|
||||
# TODO: Remove this
|
||||
git clone https://github.com/pytorch/sccache
|
||||
else
|
||||
git clone https://github.com/mozilla/sccache -b v0.8.2
|
||||
fi
|
||||
git clone https://github.com/pytorch/sccache
|
||||
cd sccache
|
||||
echo "Building sccache"
|
||||
cargo build --release
|
||||
@ -24,10 +19,6 @@ install_ubuntu() {
|
||||
rm -rf sccache
|
||||
apt-get remove -y cargo rustc
|
||||
apt-get autoclean && apt-get clean
|
||||
|
||||
echo "Downloading old sccache binary from S3 repo for PCH builds"
|
||||
curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache-0.2.14a
|
||||
chmod 755 /opt/cache/bin/sccache-0.2.14a
|
||||
}
|
||||
|
||||
install_binary() {
|
||||
@ -45,46 +36,18 @@ if [ -n "$ROCM_VERSION" ]; then
|
||||
curl --retry 3 http://repo.radeon.com/misc/.sccache_amd/sccache -o /opt/cache/bin/sccache
|
||||
else
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
if [ -n "$CUDA_VERSION" ]; then
|
||||
# TODO: Install the pre-built binary from S3 as building from source
|
||||
# https://github.com/pytorch/sccache has started failing mysteriously
|
||||
# in which sccache server couldn't start with the following error:
|
||||
# sccache: error: Invalid argument (os error 22)
|
||||
install_binary
|
||||
else
|
||||
install_ubuntu
|
||||
fi
|
||||
# TODO: Install the pre-built binary from S3 as building from source
|
||||
# https://github.com/pytorch/sccache has started failing mysteriously
|
||||
# in which sccache server couldn't start with the following error:
|
||||
# sccache: error: Invalid argument (os error 22)
|
||||
install_binary
|
||||
fi
|
||||
chmod a+x /opt/cache/bin/sccache
|
||||
|
||||
function write_sccache_stub() {
|
||||
# Unset LD_PRELOAD for ps because of asan + ps issues
|
||||
# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
|
||||
if [ $1 == "gcc" ]; then
|
||||
# Do not call sccache recursively when dumping preprocessor argument
|
||||
# For some reason it's very important for the first cached nvcc invocation
|
||||
cat > "/opt/cache/bin/$1" <<EOF
|
||||
#!/bin/sh
|
||||
|
||||
if [ "\$1" = "-E" ] || [ "\$2" = "-E" ]; then
|
||||
exec $(which $1) "\$@"
|
||||
elif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
|
||||
exec sccache $(which $1) "\$@"
|
||||
else
|
||||
exec $(which $1) "\$@"
|
||||
fi
|
||||
EOF
|
||||
else
|
||||
cat > "/opt/cache/bin/$1" <<EOF
|
||||
#!/bin/sh
|
||||
|
||||
if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
|
||||
exec sccache $(which $1) "\$@"
|
||||
else
|
||||
exec $(which $1) "\$@"
|
||||
fi
|
||||
EOF
|
||||
fi
|
||||
printf "#!/bin/sh\nif [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then\n exec sccache $(which $1) \"\$@\"\nelse\n exec $(which $1) \"\$@\"\nfi" > "/opt/cache/bin/$1"
|
||||
chmod a+x "/opt/cache/bin/$1"
|
||||
}
|
||||
|
||||
|
||||
@ -20,10 +20,9 @@ if [ -n "$CLANG_VERSION" ]; then
|
||||
fi
|
||||
|
||||
sudo apt-get update
|
||||
if [[ $CLANG_VERSION -ge 18 ]]; then
|
||||
apt-get install -y libomp-${CLANG_VERSION}-dev libclang-rt-${CLANG_VERSION}-dev clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
|
||||
else
|
||||
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
|
||||
apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
|
||||
if [[ $CLANG_VERSION == 18 ]]; then
|
||||
apt-get install -y --no-install-recommends libomp-18-dev
|
||||
fi
|
||||
|
||||
# Install dev version of LLVM.
|
||||
|
||||
@ -65,10 +65,23 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
|
||||
# Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
|
||||
if [[ $(uname -m) == "aarch64" ]]; then
|
||||
conda_install "openblas==0.3.25=*openmp*"
|
||||
CONDA_COMMON_DEPS="astunparse pyyaml setuptools openblas==0.3.25=*openmp* ninja==1.11.1 scons==4.5.2"
|
||||
|
||||
if [ "$ANACONDA_PYTHON_VERSION" = "3.8" ]; then
|
||||
NUMPY_VERSION=1.24.4
|
||||
else
|
||||
NUMPY_VERSION=1.26.2
|
||||
fi
|
||||
else
|
||||
conda_install "mkl=2021.4.0 mkl-include=2021.4.0"
|
||||
CONDA_COMMON_DEPS="astunparse pyyaml mkl=2021.4.0 mkl-include=2021.4.0 setuptools"
|
||||
|
||||
if [ "$ANACONDA_PYTHON_VERSION" = "3.11" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.12" ] || [ "$ANACONDA_PYTHON_VERSION" = "3.13" ]; then
|
||||
NUMPY_VERSION=1.26.0
|
||||
else
|
||||
NUMPY_VERSION=1.21.2
|
||||
fi
|
||||
fi
|
||||
conda_install ${CONDA_COMMON_DEPS}
|
||||
|
||||
# Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
|
||||
# and libpython-static for torch deploy
|
||||
@ -90,6 +103,8 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
||||
|
||||
# Install some other packages, including those needed for Python test reporting
|
||||
pip_install -r /opt/conda/requirements-ci.txt
|
||||
pip_install numpy=="$NUMPY_VERSION"
|
||||
pip_install -U scikit-learn
|
||||
|
||||
if [ -n "$DOCS" ]; then
|
||||
apt-get update
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=v2.21.5-1
|
||||
CUDNN_VERSION=9.5.1.17
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
|
||||
function install_cusparselt_040 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
@ -38,19 +38,7 @@ function install_cusparselt_062 {
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_cusparselt_063 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
mkdir tmp_cusparselt && pushd tmp_cusparselt
|
||||
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
|
||||
tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
|
||||
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/
|
||||
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_118 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.4.0"
|
||||
rm -rf /usr/local/cuda-11.8 /usr/local/cuda
|
||||
# install CUDA 11.8.0 in the same container
|
||||
@ -117,7 +105,6 @@ function install_121 {
|
||||
}
|
||||
|
||||
function install_124 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
|
||||
# install CUDA 12.4.1 in the same container
|
||||
@ -150,39 +137,6 @@ function install_124 {
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function install_126 {
|
||||
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
|
||||
# install CUDA 12.6.3 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux.run
|
||||
chmod +x cuda_12.6.3_560.35.05_linux.run
|
||||
./cuda_12.6.3_560.35.05_linux.run --toolkit --silent
|
||||
rm -f cuda_12.6.3_560.35.05_linux.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function prune_118 {
|
||||
echo "Pruning CUDA 11.8 and cuDNN"
|
||||
#####################################################################################
|
||||
@ -273,46 +227,12 @@ function prune_124 {
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.4 prune visual tools
|
||||
# CUDA 12.1 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.4/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
|
||||
}
|
||||
|
||||
function prune_126 {
|
||||
echo "Pruning CUDA 12.6"
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune static libs
|
||||
#####################################################################################
|
||||
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
|
||||
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
|
||||
|
||||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
|
||||
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||
export GENCODE=$OVERRIDE_GENCODE
|
||||
fi
|
||||
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
|
||||
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
|
||||
fi
|
||||
|
||||
# all CUDA libs except CuDNN and CuBLAS
|
||||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||
| xargs -I {} bash -c \
|
||||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||
|
||||
# prune CuDNN and CuBLAS
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.6/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
|
||||
}
|
||||
|
||||
# idiomatic parameter and option handling in sh
|
||||
while test $# -gt 0
|
||||
do
|
||||
@ -323,8 +243,6 @@ do
|
||||
;;
|
||||
12.4) install_124; prune_124
|
||||
;;
|
||||
12.6) install_126; prune_126
|
||||
;;
|
||||
*) echo "bad argument $1"; exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -4,7 +4,6 @@
|
||||
set -ex
|
||||
|
||||
NCCL_VERSION=v2.21.5-1
|
||||
CUDNN_VERSION=9.5.1.17
|
||||
|
||||
function install_cusparselt_062 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
@ -17,20 +16,8 @@ function install_cusparselt_062 {
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_cusparselt_063 {
|
||||
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||
mkdir tmp_cusparselt && pushd tmp_cusparselt
|
||||
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
|
||||
tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
|
||||
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/
|
||||
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
|
||||
popd
|
||||
rm -rf tmp_cusparselt
|
||||
}
|
||||
|
||||
function install_124 {
|
||||
CUDNN_VERSION=9.1.0.70
|
||||
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
echo "Installing CUDA 12.4.1 and cuDNN 9.1 and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
|
||||
rm -rf /usr/local/cuda-12.4 /usr/local/cuda
|
||||
# install CUDA 12.4.1 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.4.1/local_installers/cuda_12.4.1_550.54.15_linux_sbsa.run
|
||||
@ -41,10 +28,10 @@ function install_124 {
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz -O cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-sbsa-9.1.0.70_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-sbsa-9.1.0.70_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
@ -87,87 +74,18 @@ function prune_124 {
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.4 prune visual tools
|
||||
# CUDA 12.1 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.4/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
|
||||
}
|
||||
|
||||
function install_126 {
|
||||
echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.3"
|
||||
rm -rf /usr/local/cuda-12.6 /usr/local/cuda
|
||||
# install CUDA 12.6.3 in the same container
|
||||
wget -q https://developer.download.nvidia.com/compute/cuda/12.6.3/local_installers/cuda_12.6.3_560.35.05_linux_sbsa.run
|
||||
chmod +x cuda_12.6.3_560.35.05_linux_sbsa.run
|
||||
./cuda_12.6.3_560.35.05_linux_sbsa.run --toolkit --silent
|
||||
rm -f cuda_12.6.3_560.35.05_linux_sbsa.run
|
||||
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.6 /usr/local/cuda
|
||||
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn && cd tmp_cudnn
|
||||
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-sbsa/cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
tar xf cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive.tar.xz
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
|
||||
cp -a cudnn-linux-sbsa-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf tmp_cudnn
|
||||
|
||||
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
|
||||
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
|
||||
git clone -b ${NCCL_VERSION} --depth 1 https://github.com/NVIDIA/nccl.git
|
||||
cd nccl && make -j src.build
|
||||
cp -a build/include/* /usr/local/cuda/include/
|
||||
cp -a build/lib/* /usr/local/cuda/lib64/
|
||||
cd ..
|
||||
rm -rf nccl
|
||||
|
||||
install_cusparselt_063
|
||||
|
||||
ldconfig
|
||||
}
|
||||
|
||||
function prune_126 {
|
||||
echo "Pruning CUDA 12.6"
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune static libs
|
||||
#####################################################################################
|
||||
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
|
||||
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
|
||||
|
||||
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||
|
||||
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||
export GENCODE=$OVERRIDE_GENCODE
|
||||
fi
|
||||
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
|
||||
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
|
||||
fi
|
||||
|
||||
# all CUDA libs except CuDNN and CuBLAS
|
||||
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||
| xargs -I {} bash -c \
|
||||
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||
|
||||
# prune CuDNN and CuBLAS
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||
|
||||
#####################################################################################
|
||||
# CUDA 12.6 prune visual tools
|
||||
#####################################################################################
|
||||
export CUDA_BASE="/usr/local/cuda-12.6/"
|
||||
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
|
||||
}
|
||||
|
||||
# idiomatic parameter and option handling in sh
|
||||
while test $# -gt 0
|
||||
do
|
||||
case "$1" in
|
||||
12.4) install_124; prune_124
|
||||
;;
|
||||
12.6) install_126; prune_126
|
||||
;;
|
||||
*) echo "bad argument $1"; exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
@ -4,9 +4,7 @@ if [[ -n "${CUDNN_VERSION}" ]]; then
|
||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
||||
mkdir tmp_cudnn
|
||||
pushd tmp_cudnn
|
||||
if [[ ${CUDA_VERSION:0:4} == "12.6" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.5.1.17_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
|
||||
if [[ ${CUDA_VERSION:0:2} == "12" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda12-archive"
|
||||
elif [[ ${CUDA_VERSION:0:2} == "11" ]]; then
|
||||
CUDNN_NAME="cudnn-linux-x86_64-9.1.0.70_cuda11-archive"
|
||||
|
||||
@ -36,8 +36,12 @@ install_conda_dependencies() {
|
||||
}
|
||||
|
||||
install_pip_dependencies() {
|
||||
pushd executorch
|
||||
as_jenkins bash install_requirements.sh --pybind xnnpack
|
||||
pushd executorch/.ci/docker
|
||||
# Install PyTorch CPU build beforehand to avoid installing the much bigger CUDA
|
||||
# binaries later, ExecuTorch only needs CPU
|
||||
pip_install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
||||
# Install all Python dependencies
|
||||
pip_install -r requirements-ci.txt
|
||||
popd
|
||||
}
|
||||
|
||||
@ -50,7 +54,7 @@ setup_executorch() {
|
||||
export EXECUTORCH_BUILD_PYBIND=ON
|
||||
export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||
|
||||
as_jenkins .ci/scripts/setup-linux.sh cmake || true
|
||||
as_jenkins .ci/scripts/setup-linux.sh cmake
|
||||
popd
|
||||
}
|
||||
|
||||
|
||||
@ -16,7 +16,7 @@ case "$ID" in
|
||||
ubuntu)
|
||||
IS_UBUNTU=1
|
||||
;;
|
||||
centos|almalinux)
|
||||
centos)
|
||||
IS_UBUNTU=0
|
||||
;;
|
||||
*)
|
||||
@ -43,6 +43,12 @@ else
|
||||
fi
|
||||
ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH))
|
||||
|
||||
# Install custom MIOpen + COMgr for ROCm >= 4.0.1
|
||||
if [[ $ROCM_INT -lt 40001 ]]; then
|
||||
echo "ROCm version < 4.0.1; will not install custom MIOpen"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Function to retry functions that sometimes timeout or have flaky failures
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
@ -60,27 +66,55 @@ else
|
||||
ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}"
|
||||
fi
|
||||
|
||||
# MIOPEN_USE_HIP_KERNELS is a Workaround for COMgr issues
|
||||
MIOPEN_CMAKE_COMMON_FLAGS="
|
||||
-DMIOPEN_USE_COMGR=ON
|
||||
-DMIOPEN_BUILD_DRIVER=OFF
|
||||
"
|
||||
if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60204 ]]; then
|
||||
MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
|
||||
else
|
||||
echo "ROCm ${ROCM_VERSION} does not need any patches, do not build from source"
|
||||
# Pull MIOpen repo and set DMIOPEN_EMBED_DB based on ROCm version
|
||||
if [[ $ROCM_INT -ge 60300 ]]; then
|
||||
echo "ROCm 6.3+ MIOpen does not need any patches, do not build from source"
|
||||
exit 0
|
||||
elif [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60300 ]]; then
|
||||
MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
|
||||
elif [[ $ROCM_INT -ge 60100 ]] && [[ $ROCM_INT -lt 60200 ]]; then
|
||||
echo "ROCm 6.1 MIOpen does not need any patches, do not build from source"
|
||||
exit 0
|
||||
elif [[ $ROCM_INT -ge 60000 ]] && [[ $ROCM_INT -lt 60100 ]]; then
|
||||
echo "ROCm 6.0 MIOpen does not need any patches, do not build from source"
|
||||
exit 0
|
||||
elif [[ $ROCM_INT -ge 50700 ]] && [[ $ROCM_INT -lt 60000 ]]; then
|
||||
echo "ROCm 5.7 MIOpen does not need any patches, do not build from source"
|
||||
exit 0
|
||||
elif [[ $ROCM_INT -ge 50600 ]] && [[ $ROCM_INT -lt 50700 ]]; then
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.6-staging"
|
||||
elif [[ $ROCM_INT -ge 50500 ]] && [[ $ROCM_INT -lt 50600 ]]; then
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.5-gfx11"
|
||||
elif [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.4-staging"
|
||||
elif [[ $ROCM_INT -ge 50300 ]] && [[ $ROCM_INT -lt 50400 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.3-staging"
|
||||
elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50300 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36 -DMIOPEN_USE_MLIR=Off"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.2-staging"
|
||||
elif [[ $ROCM_INT -ge 50100 ]] && [[ $ROCM_INT -lt 50200 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.1-staging"
|
||||
elif [[ $ROCM_INT -ge 50000 ]] && [[ $ROCM_INT -lt 50100 ]]; then
|
||||
MIOPEN_CMAKE_DB_FLAGS="-DMIOPEN_EMBED_DB=gfx900_56;gfx906_60;gfx90878;gfx90a6e;gfx1030_36"
|
||||
MIOPEN_BRANCH="release/rocm-rel-5.0-staging"
|
||||
else
|
||||
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
if [[ ${IS_UBUNTU} == 1 ]]; then
|
||||
apt-get remove -y miopen-hip
|
||||
else
|
||||
# Workaround since almalinux manylinux image already has this and cget doesn't like that
|
||||
rm -rf /usr/local/lib/pkgconfig/sqlite3.pc
|
||||
|
||||
# Versioned package name needs regex match
|
||||
# Use --noautoremove to prevent other rocm packages from being uninstalled
|
||||
yum remove -y miopen-hip* --noautoremove
|
||||
yum remove -y miopen-hip
|
||||
fi
|
||||
|
||||
git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
|
||||
@ -88,7 +122,16 @@ pushd MIOpen
|
||||
# remove .git to save disk space since CI runner was running out
|
||||
rm -rf .git
|
||||
# Don't build CK to save docker build time
|
||||
sed -i '/composable_kernel/d' requirements.txt
|
||||
if [[ $ROCM_INT -ge 60200 ]]; then
|
||||
sed -i '/composable_kernel/d' requirements.txt
|
||||
fi
|
||||
# Don't build MLIR to save docker build time
|
||||
# since we are disabling MLIR backend for MIOpen anyway
|
||||
if [[ $ROCM_INT -ge 50400 ]] && [[ $ROCM_INT -lt 50500 ]]; then
|
||||
sed -i '/rocMLIR/d' requirements.txt
|
||||
elif [[ $ROCM_INT -ge 50200 ]] && [[ $ROCM_INT -lt 50400 ]]; then
|
||||
sed -i '/llvm-project-mlir/d' requirements.txt
|
||||
fi
|
||||
## MIOpen minimum requirements
|
||||
cmake -P install_deps.cmake --minimum
|
||||
|
||||
@ -110,7 +153,7 @@ cd build
|
||||
PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang++ cmake .. \
|
||||
${MIOPEN_CMAKE_COMMON_FLAGS} \
|
||||
${MIOPEN_CMAKE_DB_FLAGS} \
|
||||
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}"
|
||||
-DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}/hip;${ROCM_INSTALL_PATH}"
|
||||
make MIOpen -j $(nproc)
|
||||
|
||||
# Build MIOpen package
|
||||
|
||||
@ -32,7 +32,7 @@ pip_install coloredlogs packaging
|
||||
|
||||
pip_install onnxruntime==1.18.1
|
||||
pip_install onnx==1.16.2
|
||||
pip_install onnxscript==0.1.0.dev20241124 --no-deps
|
||||
pip_install onnxscript==0.1.0.dev20240831 --no-deps
|
||||
# required by onnxscript
|
||||
pip_install ml_dtypes
|
||||
|
||||
|
||||
@ -12,7 +12,7 @@ case "$ID" in
|
||||
apt-get install -y libpciaccess-dev pkg-config
|
||||
apt-get clean
|
||||
;;
|
||||
centos|almalinux)
|
||||
centos)
|
||||
yum install -y libpciaccess-devel pkgconfig
|
||||
;;
|
||||
*)
|
||||
|
||||
@ -3,18 +3,6 @@
|
||||
|
||||
set -ex
|
||||
|
||||
# Magma build scripts need `python`
|
||||
ln -sf /usr/bin/python3 /usr/bin/python
|
||||
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
almalinux)
|
||||
yum install -y gcc-gfortran
|
||||
;;
|
||||
*)
|
||||
echo "No preinstalls to build magma..."
|
||||
;;
|
||||
esac
|
||||
|
||||
MKLROOT=${MKLROOT:-/opt/conda/envs/py_$ANACONDA_PYTHON_VERSION}
|
||||
|
||||
|
||||
@ -2,13 +2,6 @@
|
||||
|
||||
set -ex
|
||||
|
||||
# Since version 24 the system ships with user 'ubuntu' that has id 1000
|
||||
# We need a work-around to enable id 1000 usage for this script
|
||||
if [[ $UBUNTU_VERSION == 24.04 ]]; then
|
||||
# touch is used to disable harmless error message
|
||||
touch /var/mail/ubuntu && chown ubuntu /var/mail/ubuntu && userdel -r ubuntu
|
||||
fi
|
||||
|
||||
# Mirror jenkins user in container
|
||||
# jenkins user as ec2-user should have the same user-id
|
||||
echo "jenkins:x:1000:1000::/var/lib/jenkins:" >> /etc/passwd
|
||||
|
||||
@ -24,10 +24,10 @@ function install_ubuntu() {
|
||||
| tee /etc/apt/sources.list.d/intel-gpu-${VERSION_CODENAME}.list
|
||||
# To add the online network network package repository for the Intel Support Packages
|
||||
wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
|
||||
| gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg.gpg
|
||||
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg.gpg] \
|
||||
https://apt.repos.intel.com/${XPU_REPO_NAME} all main" \
|
||||
| tee /etc/apt/sources.list.d/oneAPI.list
|
||||
| gpg --dearmor > /usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg
|
||||
echo "deb [signed-by=/usr/share/keyrings/intel-for-pytorch-gpu-dev-keyring.gpg] \
|
||||
https://apt.repos.intel.com/intel-for-pytorch-gpu-dev all main" \
|
||||
| tee /etc/apt/sources.list.d/intel-for-pytorch-gpu-dev.list
|
||||
|
||||
# Update the packages list and repository index
|
||||
apt-get update
|
||||
@ -41,13 +41,14 @@ function install_ubuntu() {
|
||||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
||||
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
|
||||
if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then
|
||||
apt-get install -y intel-ocloc
|
||||
fi
|
||||
# Development Packages
|
||||
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
|
||||
# Install Intel Support Packages
|
||||
apt-get install -y ${XPU_PACKAGES}
|
||||
if [ -n "$XPU_VERSION" ]; then
|
||||
apt-get install -y intel-for-pytorch-gpu-dev-${XPU_VERSION} intel-pti-dev
|
||||
else
|
||||
apt-get install -y intel-for-pytorch-gpu-dev intel-pti-dev
|
||||
fi
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
@ -57,13 +58,13 @@ function install_ubuntu() {
|
||||
function install_rhel() {
|
||||
. /etc/os-release
|
||||
if [[ "${ID}" == "rhel" ]]; then
|
||||
if [[ ! " 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
|
||||
if [[ ! " 8.6 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
|
||||
echo "RHEL version ${VERSION_ID} not supported"
|
||||
exit
|
||||
fi
|
||||
elif [[ "${ID}" == "almalinux" ]]; then
|
||||
# Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64
|
||||
VERSION_ID="8.8"
|
||||
VERSION_ID="8.6"
|
||||
fi
|
||||
|
||||
dnf install -y 'dnf-command(config-manager)'
|
||||
@ -71,18 +72,16 @@ function install_rhel() {
|
||||
dnf config-manager --add-repo \
|
||||
https://repositories.intel.com/gpu/rhel/${VERSION_ID}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_ID}.repo
|
||||
# To add the online network network package repository for the Intel Support Packages
|
||||
tee > /etc/yum.repos.d/oneAPI.repo << EOF
|
||||
[oneAPI]
|
||||
tee > /etc/yum.repos.d/intel-for-pytorch-gpu-dev.repo << EOF
|
||||
[intel-for-pytorch-gpu-dev]
|
||||
name=Intel for Pytorch GPU dev repository
|
||||
baseurl=https://yum.repos.intel.com/${XPU_REPO_NAME}
|
||||
baseurl=https://yum.repos.intel.com/intel-for-pytorch-gpu-dev
|
||||
enabled=1
|
||||
gpgcheck=1
|
||||
repo_gpgcheck=1
|
||||
gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
EOF
|
||||
|
||||
# Install Intel Support Packages
|
||||
yum install -y ${XPU_PACKAGES}
|
||||
# The xpu-smi packages
|
||||
dnf install -y xpu-smi
|
||||
# Compute and Media Runtimes
|
||||
@ -97,6 +96,8 @@ EOF
|
||||
dnf install -y --refresh \
|
||||
intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
|
||||
level-zero-devel
|
||||
# Install Intel Support Packages
|
||||
yum install -y intel-for-pytorch-gpu-dev intel-pti-dev
|
||||
|
||||
# Cleanup
|
||||
dnf clean all
|
||||
@ -118,7 +119,7 @@ function install_sles() {
|
||||
https://repositories.intel.com/gpu/sles/${VERSION_SP}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_SP}.repo
|
||||
rpm --import https://repositories.intel.com/gpu/intel-graphics.key
|
||||
# To add the online network network package repository for the Intel Support Packages
|
||||
zypper addrepo https://yum.repos.intel.com/${XPU_REPO_NAME} oneAPI
|
||||
zypper addrepo https://yum.repos.intel.com/intel-for-pytorch-gpu-dev intel-for-pytorch-gpu-dev
|
||||
rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
||||
|
||||
# The xpu-smi packages
|
||||
@ -130,7 +131,7 @@ function install_sles() {
|
||||
zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel
|
||||
|
||||
# Install Intel Support Packages
|
||||
zypper install -y ${XPU_PACKAGES}
|
||||
zypper install -y intel-for-pytorch-gpu-dev intel-pti-dev
|
||||
|
||||
}
|
||||
|
||||
@ -141,13 +142,6 @@ if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then
|
||||
XPU_DRIVER_VERSION=""
|
||||
fi
|
||||
|
||||
XPU_REPO_NAME="intel-for-pytorch-gpu-dev"
|
||||
XPU_PACKAGES="intel-for-pytorch-gpu-dev-0.5 intel-pti-dev-0.9"
|
||||
if [[ "$XPU_VERSION" == "2025.0" ]]; then
|
||||
XPU_REPO_NAME="oneapi"
|
||||
XPU_PACKAGES="intel-deep-learning-essentials-2025.0"
|
||||
fi
|
||||
|
||||
# The installation depends on the base OS
|
||||
ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
|
||||
case "$ID" in
|
||||
|
||||
@ -1,39 +1,47 @@
|
||||
ARG CUDA_VERSION=12.4
|
||||
ARG CUDA_VERSION=10.2
|
||||
ARG BASE_TARGET=cuda${CUDA_VERSION}
|
||||
FROM amd64/almalinux:8 as base
|
||||
FROM centos:7 as base
|
||||
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US.UTF-8
|
||||
|
||||
ARG DEVTOOLSET_VERSION=11
|
||||
|
||||
ENV LC_ALL en_US.UTF-8
|
||||
ENV LANG en_US.UTF-8
|
||||
ENV LANGUAGE en_US.UTF-8
|
||||
|
||||
RUN yum -y update
|
||||
RUN yum -y install epel-release
|
||||
RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel openssl-devel yum-utils autoconf automake make gcc-toolset-${DEVTOOLSET_VERSION}-toolchain
|
||||
ARG DEVTOOLSET_VERSION=9
|
||||
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
|
||||
RUN yum update -y
|
||||
RUN yum install -y wget curl perl util-linux xz bzip2 git patch which unzip
|
||||
# Just add everything as a safe.directory for git since these will be used in multiple places with git
|
||||
RUN git config --global --add safe.directory '*'
|
||||
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
|
||||
RUN yum install -y yum-utils centos-release-scl
|
||||
RUN yum-config-manager --enable rhel-server-rhscl-7-rpms
|
||||
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
|
||||
RUN yum install -y devtoolset-${DEVTOOLSET_VERSION}-gcc devtoolset-${DEVTOOLSET_VERSION}-gcc-c++ devtoolset-${DEVTOOLSET_VERSION}-gcc-gfortran devtoolset-${DEVTOOLSET_VERSION}-binutils
|
||||
# EPEL for cmake
|
||||
RUN yum --enablerepo=extras install -y epel-release
|
||||
|
||||
# cmake-3.18.4 from pip
|
||||
RUN yum install -y python3-pip && \
|
||||
python3 -mpip install cmake==3.18.4 && \
|
||||
ln -s /usr/local/bin/cmake /usr/bin/cmake3
|
||||
# cmake
|
||||
RUN yum install -y cmake3 && \
|
||||
ln -s /usr/bin/cmake3 /usr/bin/cmake
|
||||
ENV PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/devtoolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
|
||||
|
||||
RUN yum install -y autoconf aclocal automake make sudo
|
||||
RUN rm -rf /usr/local/cuda-*
|
||||
|
||||
FROM base as openssl
|
||||
ADD ./common/install_openssl.sh install_openssl.sh
|
||||
RUN bash ./install_openssl.sh && rm install_openssl.sh
|
||||
|
||||
FROM base as patchelf
|
||||
# Install patchelf
|
||||
ADD ./common/install_patchelf.sh install_patchelf.sh
|
||||
RUN bash ./install_patchelf.sh && rm install_patchelf.sh && cp $(which patchelf) /patchelf
|
||||
|
||||
FROM base as openssl
|
||||
# Install openssl
|
||||
ADD ./common/install_openssl.sh install_openssl.sh
|
||||
RUN bash ./install_openssl.sh && rm install_openssl.sh
|
||||
|
||||
FROM base as conda
|
||||
# Install Anaconda
|
||||
ADD ./common/install_conda_docker.sh install_conda.sh
|
||||
@ -41,7 +49,7 @@ RUN bash ./install_conda.sh && rm install_conda.sh
|
||||
|
||||
# Install CUDA
|
||||
FROM base as cuda
|
||||
ARG CUDA_VERSION=12.4
|
||||
ARG CUDA_VERSION=10.2
|
||||
RUN rm -rf /usr/local/cuda-*
|
||||
ADD ./common/install_cuda.sh install_cuda.sh
|
||||
ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
|
||||
@ -62,10 +70,6 @@ FROM cuda as cuda12.4
|
||||
RUN bash ./install_cuda.sh 12.4
|
||||
ENV DESIRED_CUDA=12.4
|
||||
|
||||
FROM cuda as cuda12.6
|
||||
RUN bash ./install_cuda.sh 12.6
|
||||
ENV DESIRED_CUDA=12.6
|
||||
|
||||
# Install MNIST test data
|
||||
FROM base as mnist
|
||||
ADD ./common/install_mnist.sh install_mnist.sh
|
||||
@ -75,7 +79,6 @@ FROM base as all_cuda
|
||||
COPY --from=cuda11.8 /usr/local/cuda-11.8 /usr/local/cuda-11.8
|
||||
COPY --from=cuda12.1 /usr/local/cuda-12.1 /usr/local/cuda-12.1
|
||||
COPY --from=cuda12.4 /usr/local/cuda-12.4 /usr/local/cuda-12.4
|
||||
COPY --from=cuda12.6 /usr/local/cuda-12.6 /usr/local/cuda-12.6
|
||||
|
||||
# Final step
|
||||
FROM ${BASE_TARGET} as final
|
||||
@ -88,8 +91,7 @@ COPY ./common/install_jni.sh install_jni.sh
|
||||
COPY ./java/jni.h jni.h
|
||||
RUN bash ./install_jni.sh && rm install_jni.sh
|
||||
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
COPY --from=mnist /usr/local/mnist /usr/local/mnist
|
||||
RUN rm -rf /usr/local/cuda
|
||||
RUN chmod o+rw /usr/local
|
||||
@ -48,10 +48,10 @@ esac
|
||||
--progress plain \
|
||||
--build-arg "BASE_TARGET=${BASE_TARGET}" \
|
||||
--build-arg "CUDA_VERSION=${CUDA_VERSION}" \
|
||||
--build-arg "DEVTOOLSET_VERSION=11" \
|
||||
--build-arg "DEVTOOLSET_VERSION=9" \
|
||||
-t ${DOCKER_IMAGE_NAME} \
|
||||
$@ \
|
||||
-f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
|
||||
-f "${TOPDIR}/.ci/docker/conda/Dockerfile" \
|
||||
${TOPDIR}/.ci/docker/
|
||||
)
|
||||
|
||||
@ -66,11 +66,6 @@ RUN bash ./install_cuda.sh 12.4
|
||||
RUN bash ./install_magma.sh 12.4
|
||||
RUN ln -sf /usr/local/cuda-12.4 /usr/local/cuda
|
||||
|
||||
FROM cuda as cuda12.6
|
||||
RUN bash ./install_cuda.sh 12.6
|
||||
RUN bash ./install_magma.sh 12.6
|
||||
RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda
|
||||
|
||||
FROM cpu as rocm
|
||||
ARG PYTORCH_ROCM_ARCH
|
||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||
|
||||
@ -144,10 +144,6 @@ COPY --from=libpng /usr/local/lib/pkgconfig /usr/local/
|
||||
FROM common as cpu_final
|
||||
ARG BASE_CUDA_VERSION=10.1
|
||||
ARG DEVTOOLSET_VERSION=9
|
||||
# Install Anaconda
|
||||
ADD ./common/install_conda_docker.sh install_conda.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
RUN sed -i s/mirror.centos.org/vault.centos.org/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^#.*baseurl=http/baseurl=http/g /etc/yum.repos.d/*.repo
|
||||
RUN sed -i s/^mirrorlist=http/#mirrorlist=http/g /etc/yum.repos.d/*.repo
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
# syntax = docker/dockerfile:experimental
|
||||
ARG ROCM_VERSION=3.7
|
||||
ARG BASE_CUDA_VERSION=11.8
|
||||
ARG GPU_IMAGE=amd64/almalinux:8
|
||||
FROM quay.io/pypa/manylinux_2_28_x86_64 as base
|
||||
@ -116,49 +117,30 @@ COPY --from=jni /usr/local/include/jni.h /usr/local/
|
||||
FROM common as cpu_final
|
||||
ARG BASE_CUDA_VERSION=11.8
|
||||
ARG DEVTOOLSET_VERSION=11
|
||||
# Install Anaconda
|
||||
ADD ./common/install_conda_docker.sh install_conda.sh
|
||||
RUN bash ./install_conda.sh && rm install_conda.sh
|
||||
ENV PATH /opt/conda/bin:$PATH
|
||||
# Ensure the expected devtoolset is used
|
||||
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
|
||||
# Install setuptools and wheel for python 3.12/3.13
|
||||
RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
|
||||
/opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \
|
||||
done;
|
||||
|
||||
|
||||
# cmake-3.18.4 from pip; force in case cmake3 already exists
|
||||
# cmake-3.18.4 from pip
|
||||
RUN yum install -y python3-pip && \
|
||||
python3 -mpip install cmake==3.18.4 && \
|
||||
ln -sf /usr/local/bin/cmake /usr/bin/cmake3
|
||||
ln -s /usr/local/bin/cmake /usr/bin/cmake3
|
||||
|
||||
FROM cpu_final as cuda_final
|
||||
RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
COPY --from=cuda /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
COPY --from=magma /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda-${BASE_CUDA_VERSION}
|
||||
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
|
||||
ENV PATH=/usr/local/cuda/bin:$PATH
|
||||
|
||||
FROM cpu_final as rocm_final
|
||||
ARG ROCM_VERSION=6.0
|
||||
ARG PYTORCH_ROCM_ARCH
|
||||
ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
|
||||
ARG DEVTOOLSET_VERSION=11
|
||||
ENV LDFLAGS="-Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64 -Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib"
|
||||
# Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path,
|
||||
# below workaround helps avoid error
|
||||
ENV ROCM_PATH /opt/rocm
|
||||
# cmake-3.28.4 from pip to get enable_language(HIP)
|
||||
# and avoid 3.21.0 cmake+ninja issues with ninja inserting "-Wl,--no-as-needed" in LINK_FLAGS for static linker
|
||||
RUN python3 -m pip install --upgrade pip && \
|
||||
python3 -mpip install cmake==3.28.4
|
||||
ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
|
||||
RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
|
||||
ENV MKLROOT /opt/intel
|
||||
ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh && rm install_rocm_magma.sh
|
||||
FROM common as rocm_final
|
||||
ARG ROCM_VERSION=3.7
|
||||
# Install ROCm
|
||||
ADD ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh ${ROCM_VERSION} && rm install_rocm.sh
|
||||
# cmake is already installed inside the rocm base image, but both 2 and 3 exist
|
||||
# cmake3 is needed for the later MIOpen custom build, so that step is last.
|
||||
RUN yum install -y cmake3 && \
|
||||
rm -f /usr/bin/cmake && \
|
||||
ln -s /usr/bin/cmake3 /usr/bin/cmake
|
||||
ADD ./common/install_miopen.sh install_miopen.sh
|
||||
RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
|
||||
|
||||
@ -168,7 +150,8 @@ ENV XPU_DRIVER_TYPE ROLLING
|
||||
# cmake-3.28.4 from pip
|
||||
RUN python3 -m pip install --upgrade pip && \
|
||||
python3 -mpip install cmake==3.28.4
|
||||
# Install setuptools and wheel for python 3.13
|
||||
RUN /opt/python/cp313-cp313/bin/python -m pip install setuptools wheel
|
||||
ADD ./common/install_xpu.sh install_xpu.sh
|
||||
ENV XPU_VERSION 2025.0
|
||||
RUN bash ./install_xpu.sh && rm install_xpu.sh
|
||||
RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd
|
||||
|
||||
@ -48,11 +48,6 @@ ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/op
|
||||
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
|
||||
RUN git config --global --add safe.directory "*"
|
||||
|
||||
FROM base as openblas
|
||||
# Install openblas
|
||||
ADD ./common/install_openblas.sh install_openblas.sh
|
||||
RUN bash ./install_openblas.sh && rm install_openblas.sh
|
||||
|
||||
FROM base as final
|
||||
|
||||
# remove unncessary python versions
|
||||
@ -60,5 +55,3 @@ RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
|
||||
RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
|
||||
RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
|
||||
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
|
||||
COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/
|
||||
ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH
|
||||
|
||||
@ -61,7 +61,7 @@ RUN git config --global --add safe.directory "*"
|
||||
# NOTE: Need a better way to get this library as Ubuntu's package can be removed by the vender, or changed
|
||||
###############################################################################
|
||||
RUN cd ~/ \
|
||||
&& curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-4ubuntu2_arm64.deb \
|
||||
&& curl -L -o ~/libgfortran-10-dev.deb http://ports.ubuntu.com/ubuntu-ports/pool/universe/g/gcc-10/libgfortran-10-dev_10.5.0-1ubuntu1_arm64.deb \
|
||||
&& ar x ~/libgfortran-10-dev.deb \
|
||||
&& tar --use-compress-program=unzstd -xvf data.tar.zst -C ~/ \
|
||||
&& cp -f ~/usr/lib/gcc/aarch64-linux-gnu/10/libgfortran.a /opt/rh/devtoolset-10/root/usr/lib/gcc/aarch64-redhat-linux/10/
|
||||
|
||||
@ -1,20 +1,17 @@
|
||||
FROM quay.io/pypa/manylinux_2_28_s390x as base
|
||||
FROM --platform=linux/s390x docker.io/ubuntu:24.04 as base
|
||||
|
||||
# Language variables
|
||||
ENV LC_ALL=C.UTF-8
|
||||
ENV LANG=C.UTF-8
|
||||
ENV LANGUAGE=C.UTF-8
|
||||
|
||||
ARG DEVTOOLSET_VERSION=13
|
||||
# Installed needed OS packages. This is to support all
|
||||
# the binary builds (torch, vision, audio, text, data)
|
||||
RUN yum -y install epel-release
|
||||
RUN yum -y update
|
||||
RUN yum install -y \
|
||||
sudo \
|
||||
RUN apt update ; apt upgrade -y
|
||||
RUN apt install -y \
|
||||
build-essential \
|
||||
autoconf \
|
||||
automake \
|
||||
bison \
|
||||
bzip2 \
|
||||
curl \
|
||||
diffutils \
|
||||
@ -27,40 +24,19 @@ RUN yum install -y \
|
||||
util-linux \
|
||||
wget \
|
||||
which \
|
||||
xz \
|
||||
yasm \
|
||||
xz-utils \
|
||||
less \
|
||||
zstd \
|
||||
libgomp \
|
||||
gcc-toolset-${DEVTOOLSET_VERSION}-gcc \
|
||||
gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \
|
||||
gcc-toolset-${DEVTOOLSET_VERSION}-binutils \
|
||||
gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
|
||||
cmake \
|
||||
rust \
|
||||
cargo \
|
||||
llvm-devel \
|
||||
libzstd-devel \
|
||||
python3.12-devel \
|
||||
python3.12-setuptools \
|
||||
python3.12-pip \
|
||||
python3-virtualenv \
|
||||
python3.12-pyyaml \
|
||||
python3.12-numpy \
|
||||
python3.12-wheel \
|
||||
python3.12-cryptography \
|
||||
blas-devel \
|
||||
openblas-devel \
|
||||
lapack-devel \
|
||||
atlas-devel \
|
||||
libjpeg-devel \
|
||||
libxslt-devel \
|
||||
libxml2-devel \
|
||||
openssl-devel \
|
||||
valgrind
|
||||
|
||||
ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
|
||||
python3 \
|
||||
python3-dev \
|
||||
python3-setuptools \
|
||||
python3-yaml \
|
||||
python3-typing-extensions \
|
||||
libblas-dev \
|
||||
libopenblas-dev \
|
||||
liblapack-dev \
|
||||
libatlas-base-dev
|
||||
|
||||
# git236+ would refuse to run git commands in repos owned by other users
|
||||
# Which causes version check to fail, as pytorch repo is bind-mounted into the image
|
||||
@ -68,8 +44,14 @@ ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/op
|
||||
# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
|
||||
RUN git config --global --add safe.directory "*"
|
||||
|
||||
# installed python doesn't have development parts. Rebuild it from scratch
|
||||
RUN /bin/rm -rf /opt/_internal /opt/python /usr/local/*/*
|
||||
FROM base as openssl
|
||||
# Install openssl (this must precede `build python` step)
|
||||
# (In order to have a proper SSL module, Python is compiled
|
||||
# against a recent openssl [see env vars above], which is linked
|
||||
# statically. We delete openssl afterwards.)
|
||||
ADD ./common/install_openssl.sh install_openssl.sh
|
||||
RUN bash ./install_openssl.sh && rm install_openssl.sh
|
||||
ENV SSL_CERT_FILE=/opt/_internal/certs.pem
|
||||
|
||||
# EPEL for cmake
|
||||
FROM base as patchelf
|
||||
@ -82,43 +64,10 @@ FROM patchelf as python
|
||||
# build python
|
||||
COPY manywheel/build_scripts /build_scripts
|
||||
ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh
|
||||
ENV SSL_CERT_FILE=
|
||||
RUN bash build_scripts/build.sh && rm -r build_scripts
|
||||
|
||||
FROM base as final
|
||||
FROM openssl as final
|
||||
COPY --from=python /opt/python /opt/python
|
||||
COPY --from=python /opt/_internal /opt/_internal
|
||||
COPY --from=python /opt/python/cp39-cp39/bin/auditwheel /usr/local/bin/auditwheel
|
||||
COPY --from=python /opt/python/cp39-cp39/bin/auditwheel /usr/local/bin/auditwheel
|
||||
COPY --from=patchelf /usr/local/bin/patchelf /usr/local/bin/patchelf
|
||||
|
||||
RUN alternatives --set python /usr/bin/python3.12
|
||||
RUN alternatives --set python3 /usr/bin/python3.12
|
||||
|
||||
RUN pip-3.12 install typing_extensions
|
||||
|
||||
ENTRYPOINT []
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
# install test dependencies:
|
||||
# - grpcio requires system openssl, bundled crypto fails to build
|
||||
# - ml_dtypes 0.4.0 requires some fixes provided in later commits to build
|
||||
RUN dnf install -y \
|
||||
protobuf-devel \
|
||||
protobuf-c-devel \
|
||||
protobuf-lite-devel \
|
||||
wget \
|
||||
patch
|
||||
|
||||
RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio==1.65.4
|
||||
RUN cd ~ && \
|
||||
git clone https://github.com/jax-ml/ml_dtypes && \
|
||||
cd ml_dtypes && \
|
||||
git checkout v0.4.0 && \
|
||||
git submodule update --init --recursive && \
|
||||
wget https://github.com/jax-ml/ml_dtypes/commit/b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
|
||||
wget https://github.com/jax-ml/ml_dtypes/commit/d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
|
||||
patch -p1 < b969f76914d6b30676721bc92bf0f6021a0d1321.patch && \
|
||||
patch -p1 < d4e6d035ecda073eab8bcf60f4eef572ee7087e6.patch && \
|
||||
python3 setup.py bdist_wheel && \
|
||||
pip3 install dist/*.whl && \
|
||||
rm -rf ml_dtypes
|
||||
|
||||
@ -61,7 +61,7 @@ case ${GPU_ARCH_TYPE} in
|
||||
cpu-s390x)
|
||||
TARGET=final
|
||||
DOCKER_TAG=cpu-s390x
|
||||
GPU_IMAGE=s390x/almalinux:8
|
||||
GPU_IMAGE=redhat/ubi9
|
||||
DOCKER_GPU_BUILD_ARG=""
|
||||
MANY_LINUX_VERSION="s390x"
|
||||
;;
|
||||
@ -87,17 +87,11 @@ case ${GPU_ARCH_TYPE} in
|
||||
MANY_LINUX_VERSION="aarch64"
|
||||
DOCKERFILE_SUFFIX="_cuda_aarch64"
|
||||
;;
|
||||
rocm|rocm-manylinux_2_28)
|
||||
rocm)
|
||||
TARGET=rocm_final
|
||||
DOCKER_TAG=rocm${GPU_ARCH_VERSION}
|
||||
GPU_IMAGE=rocm/dev-centos-7:${GPU_ARCH_VERSION}-complete
|
||||
DEVTOOLSET_VERSION="9"
|
||||
if [ ${GPU_ARCH_TYPE} == "rocm-manylinux_2_28" ]; then
|
||||
MANY_LINUX_VERSION="2_28"
|
||||
DEVTOOLSET_VERSION="11"
|
||||
GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
|
||||
fi
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100"
|
||||
PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx1030;gfx1100"
|
||||
ROCM_REGEX="([0-9]+)\.([0-9]+)[\.]?([0-9]*)"
|
||||
if [[ $GPU_ARCH_VERSION =~ $ROCM_REGEX ]]; then
|
||||
ROCM_VERSION_INT=$((${BASH_REMATCH[1]}*10000 + ${BASH_REMATCH[2]}*100 + ${BASH_REMATCH[3]:-0}))
|
||||
@ -105,7 +99,10 @@ case ${GPU_ARCH_TYPE} in
|
||||
echo "ERROR: rocm regex failed"
|
||||
exit 1
|
||||
fi
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
|
||||
if [[ $ROCM_VERSION_INT -ge 60000 ]]; then
|
||||
PYTORCH_ROCM_ARCH+=";gfx942"
|
||||
fi
|
||||
DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=9"
|
||||
;;
|
||||
xpu)
|
||||
TARGET=xpu_final
|
||||
@ -128,13 +125,11 @@ fi
|
||||
(
|
||||
set -x
|
||||
|
||||
if [ "$(uname -m)" != "s390x" ]; then
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
fi
|
||||
# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
|
||||
# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
|
||||
sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl restart docker
|
||||
|
||||
DOCKER_BUILDKIT=1 docker build \
|
||||
${DOCKER_GPU_BUILD_ARG} \
|
||||
|
||||
@ -16,27 +16,37 @@ CURL_HASH=cf34fe0b07b800f1c01a499a6e8b2af548f6d0e044dca4a29d88a4bee146d131
|
||||
AUTOCONF_ROOT=autoconf-2.69
|
||||
AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
|
||||
|
||||
# Dependencies for compiling Python that we want to remove from
|
||||
# the final image after compiling Python
|
||||
PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel"
|
||||
|
||||
if [ "$(uname -m)" != "s390x" ] ; then
|
||||
PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} db4-devel"
|
||||
else
|
||||
PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} libdb-devel"
|
||||
fi
|
||||
|
||||
# Libraries that are allowed as part of the manylinux1 profile
|
||||
MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel ncurses-devel"
|
||||
|
||||
# Get build utilities
|
||||
MY_DIR=$(dirname "${BASH_SOURCE[0]}")
|
||||
source $MY_DIR/build_utils.sh
|
||||
|
||||
# Development tools and libraries
|
||||
yum -y install bzip2 make git patch unzip bison yasm diffutils \
|
||||
automake which file \
|
||||
${PYTHON_COMPILE_DEPS}
|
||||
if [ "$(uname -m)" != "s390x" ] ; then
|
||||
# Dependencies for compiling Python that we want to remove from
|
||||
# the final image after compiling Python
|
||||
PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel db4-devel libpcap-devel xz-devel libffi-devel"
|
||||
|
||||
# Libraries that are allowed as part of the manylinux1 profile
|
||||
MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel mesa-libGL-devel libICE-devel libSM-devel ncurses-devel"
|
||||
|
||||
# Development tools and libraries
|
||||
yum -y install bzip2 make git patch unzip bison yasm diffutils \
|
||||
automake which file cmake28 \
|
||||
kernel-devel-`uname -r` \
|
||||
${PYTHON_COMPILE_DEPS}
|
||||
else
|
||||
# Dependencies for compiling Python that we want to remove from
|
||||
# the final image after compiling Python
|
||||
PYTHON_COMPILE_DEPS="zlib1g-dev libbz2-dev libncurses-dev libsqlite3-dev libdb-dev libpcap-dev liblzma-dev libffi-dev"
|
||||
|
||||
# Libraries that are allowed as part of the manylinux1 profile
|
||||
MANYLINUX1_DEPS="libglib2.0-dev libX11-dev libncurses-dev"
|
||||
|
||||
# Development tools and libraries
|
||||
apt install -y bzip2 make git patch unzip diffutils \
|
||||
automake which file cmake \
|
||||
linux-headers-virtual \
|
||||
${PYTHON_COMPILE_DEPS}
|
||||
fi
|
||||
|
||||
# Install newest autoconf
|
||||
build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH
|
||||
@ -82,13 +92,16 @@ ln -s $PY39_BIN/auditwheel /usr/local/bin/auditwheel
|
||||
|
||||
# Clean up development headers and other unnecessary stuff for
|
||||
# final image
|
||||
yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \
|
||||
avahi freetype bitstream-vera-fonts \
|
||||
${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
|
||||
yum -y install ${MANYLINUX1_DEPS}
|
||||
yum -y clean all > /dev/null 2>&1
|
||||
yum list installed
|
||||
|
||||
if [ "$(uname -m)" != "s390x" ] ; then
|
||||
yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \
|
||||
avahi freetype bitstream-vera-fonts \
|
||||
${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
|
||||
yum -y install ${MANYLINUX1_DEPS}
|
||||
yum -y clean all > /dev/null 2>&1
|
||||
yum list installed
|
||||
else
|
||||
apt purge -y ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
|
||||
fi
|
||||
# we don't need libpython*.a, and they're many megabytes
|
||||
find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f
|
||||
# Strip what we can -- and ignore errors, because this just attempts to strip
|
||||
|
||||
@ -1,12 +1,10 @@
|
||||
# cf. https://github.com/pypa/manylinux/issues/53
|
||||
|
||||
import sys
|
||||
from urllib.request import urlopen
|
||||
|
||||
|
||||
GOOD_SSL = "https://google.com"
|
||||
BAD_SSL = "https://self-signed.badssl.com"
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
print("Testing SSL certificate checking for Python:", sys.version)
|
||||
|
||||
@ -14,8 +12,14 @@ if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4):
|
||||
print("This version never checks SSL certs; skipping tests")
|
||||
sys.exit(0)
|
||||
|
||||
if sys.version_info[0] >= 3:
|
||||
from urllib.request import urlopen
|
||||
|
||||
EXC = OSError
|
||||
EXC = OSError
|
||||
else:
|
||||
from urllib import urlopen
|
||||
|
||||
EXC = IOError
|
||||
|
||||
print(f"Connecting to {GOOD_SSL} should work")
|
||||
urlopen(GOOD_SSL)
|
||||
|
||||
@ -5,7 +5,7 @@
|
||||
#Pinned versions: 1.6
|
||||
#test that import:
|
||||
|
||||
boto3==1.35.42
|
||||
boto3==1.19.12
|
||||
#Description: AWS SDK for python
|
||||
#Pinned versions: 1.19.12, 1.16.34
|
||||
#test that import:
|
||||
@ -36,7 +36,7 @@ expecttest==0.2.1
|
||||
#Pinned versions: 0.2.1
|
||||
#test that import:
|
||||
|
||||
fbscribelogger==0.1.7
|
||||
fbscribelogger==0.1.6
|
||||
#Description: write to scribe from authenticated jobs on CI
|
||||
#Pinned versions: 0.1.6
|
||||
#test that import:
|
||||
@ -118,7 +118,7 @@ numba==0.55.2 ; python_version == "3.10"
|
||||
|
||||
#numpy
|
||||
#Description: Provides N-dimensional arrays and linear algebra
|
||||
#Pinned versions: 1.26.2
|
||||
#Pinned versions: 1.20
|
||||
#test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
|
||||
#test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
|
||||
#test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
|
||||
@ -128,9 +128,6 @@ numba==0.55.2 ; python_version == "3.10"
|
||||
#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
|
||||
#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
|
||||
#test_binary_ufuncs.py
|
||||
numpy==1.22.4; python_version == "3.9" or python_version == "3.10"
|
||||
numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
|
||||
numpy==2.1.2; python_version >= "3.13"
|
||||
|
||||
#onnxruntime
|
||||
#Description: scoring engine for Open Neural Network Exchange (ONNX) models
|
||||
@ -142,9 +139,9 @@ opt-einsum==3.3
|
||||
#Pinned versions: 3.3
|
||||
#test that import: test_linalg.py
|
||||
|
||||
optree==0.13.0
|
||||
optree==0.12.1
|
||||
#Description: A library for tree manipulation
|
||||
#Pinned versions: 0.13.0
|
||||
#Pinned versions: 0.12.1
|
||||
#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
|
||||
#test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
|
||||
#common_utils.py, test_eager_transforms.py, test_python_dispatch.py,
|
||||
@ -256,7 +253,7 @@ tb-nightly==2.13.0a20230426
|
||||
#test that import:
|
||||
|
||||
# needed by torchgen utils
|
||||
typing-extensions>=4.10.0
|
||||
typing-extensions
|
||||
#Description: type hints for python
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
@ -281,6 +278,11 @@ redis>=4.0.0
|
||||
#Description: redis database
|
||||
#test that import: anything that tests OSS caching/mocking (inductor/test_codecache.py, inductor/test_max_autotune.py)
|
||||
|
||||
rockset==1.0.3
|
||||
#Description: queries Rockset
|
||||
#Pinned versions: 1.0.3
|
||||
#test that import:
|
||||
|
||||
ghstack==0.8.0
|
||||
#Description: ghstack tool
|
||||
#Pinned versions: 0.8.0
|
||||
@ -320,12 +322,13 @@ lxml==5.0.0
|
||||
|
||||
PyGithub==2.3.0
|
||||
|
||||
sympy==1.12.1 ; python_version == "3.8"
|
||||
sympy==1.13.1 ; python_version >= "3.9"
|
||||
#Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
onnx==1.17.0
|
||||
onnx==1.16.1
|
||||
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
@ -339,26 +342,3 @@ parameterized==0.8.1
|
||||
#Description: Parameterizes unittests, both the tests themselves and the entire testing class
|
||||
#Pinned versions:
|
||||
#test that import:
|
||||
|
||||
#Description: required for testing torch/distributed/_tools/sac_estimator.py
|
||||
#Pinned versions: 1.24.0
|
||||
#test that import: test_sac_estimator.py
|
||||
|
||||
pwlf==2.2.1 ; python_version >= "3.8"
|
||||
#Description: required for testing torch/distributed/_tools/sac_estimator.py
|
||||
#Pinned versions: 2.2.1
|
||||
#test that import: test_sac_estimator.py
|
||||
|
||||
|
||||
# To build PyTorch itself
|
||||
astunparse
|
||||
PyYAML
|
||||
setuptools
|
||||
|
||||
ninja==1.11.1 ; platform_machine == "aarch64"
|
||||
scons==4.5.2 ; platform_machine == "aarch64"
|
||||
|
||||
pulp==2.9.0 ; python_version >= "3.8"
|
||||
#Description: required for testing ilp formulaiton under torch/distributed/_tools
|
||||
#Pinned versions: 2.9.0
|
||||
#test that import: test_sac_ilp.py
|
||||
|
||||
@ -1 +1 @@
|
||||
3.2.0
|
||||
3.1.0
|
||||
|
||||
@ -87,6 +87,19 @@ RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
|
||||
RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
|
||||
ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# (optional) Install Android NDK
|
||||
ARG ANDROID
|
||||
ARG ANDROID_NDK
|
||||
ARG GRADLE_VERSION
|
||||
COPY ./common/install_android.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
|
||||
COPY ./android/AndroidManifest.xml AndroidManifest.xml
|
||||
COPY ./android/build.gradle build.gradle
|
||||
RUN if [ -n "${ANDROID}" ]; then bash ./install_android.sh; fi
|
||||
RUN rm install_android.sh cache_vision_models.sh common_utils.sh
|
||||
RUN rm AndroidManifest.xml
|
||||
RUN rm build.gradle
|
||||
ENV INSTALLED_ANDROID ${ANDROID}
|
||||
|
||||
# (optional) Install Vulkan SDK
|
||||
ARG VULKAN_SDK_VERSION
|
||||
COPY ./common/install_vulkan_sdk.sh install_vulkan_sdk.sh
|
||||
|
||||
@ -1,10 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# This is mostly just a shim to manywheel/build.sh
|
||||
# TODO: Make this a dedicated script to build just libtorch
|
||||
|
||||
set -ex
|
||||
|
||||
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh
|
||||
2
.ci/magma/.gitignore
vendored
2
.ci/magma/.gitignore
vendored
@ -1,2 +0,0 @@
|
||||
output/
|
||||
magma-cuda*/
|
||||
@ -1,48 +0,0 @@
|
||||
SHELL=/usr/bin/env bash
|
||||
|
||||
DOCKER_CMD ?= docker
|
||||
DESIRED_CUDA ?= 11.8
|
||||
DESIRED_CUDA_SHORT = $(subst .,,$(DESIRED_CUDA))
|
||||
PACKAGE_NAME = magma-cuda
|
||||
CUDA_ARCH_LIST ?= -gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90
|
||||
|
||||
DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
||||
-v $(shell git rev-parse --show-toplevel)/.ci:/builder \
|
||||
-w /builder \
|
||||
-e PACKAGE_NAME=${PACKAGE_NAME}${DESIRED_CUDA_SHORT} \
|
||||
-e DESIRED_CUDA=${DESIRED_CUDA} \
|
||||
-e CUDA_ARCH_LIST="${CUDA_ARCH_LIST}" \
|
||||
"pytorch/manylinux-builder:cuda${DESIRED_CUDA}-main" \
|
||||
magma/build_magma.sh
|
||||
|
||||
.PHONY: all
|
||||
all: magma-cuda126
|
||||
all: magma-cuda124
|
||||
all: magma-cuda121
|
||||
all: magma-cuda118
|
||||
|
||||
.PHONY:
|
||||
clean:
|
||||
$(RM) -r magma-*
|
||||
$(RM) -r output
|
||||
|
||||
.PHONY: magma-cuda126
|
||||
magma-cuda126: DESIRED_CUDA := 12.6
|
||||
magma-cuda126:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-cuda124
|
||||
magma-cuda124: DESIRED_CUDA := 12.4
|
||||
magma-cuda124:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-cuda121
|
||||
magma-cuda121: DESIRED_CUDA := 12.1
|
||||
magma-cuda121:
|
||||
$(DOCKER_RUN)
|
||||
|
||||
.PHONY: magma-cuda118
|
||||
magma-cuda118: DESIRED_CUDA := 11.8
|
||||
magma-cuda118: CUDA_ARCH_LIST += -gencode arch=compute_37,code=sm_37
|
||||
magma-cuda118:
|
||||
$(DOCKER_RUN)
|
||||
@ -1,50 +0,0 @@
|
||||
# Magma
|
||||
|
||||
This folder contains the scripts and configurations to build magma, statically linked for various versions of CUDA.
|
||||
|
||||
## Building
|
||||
|
||||
Look in the `Makefile` for available targets to build. To build any target, for example `magma-cuda118`, run
|
||||
|
||||
```
|
||||
# Using `docker`
|
||||
make magma-cuda118
|
||||
|
||||
# Using `podman`
|
||||
DOCKER_CMD=podman make magma-cuda118
|
||||
```
|
||||
|
||||
This spawns a `pytorch/manylinux-cuda<version>` docker image, which has the required `devtoolset` and CUDA versions installed.
|
||||
Within the docker image, it runs `build_magma.sh` with the correct environment variables set, which package the necessary files
|
||||
into a tarball, with the following structure:
|
||||
|
||||
```
|
||||
.
|
||||
├── include # header files
|
||||
├── lib # libmagma.a
|
||||
├── info
|
||||
│ ├── licenses # license file
|
||||
│ └── recipe # build script and patches
|
||||
```
|
||||
|
||||
More specifically, `build_magma.sh` copies over the relevant files from the `package_files` directory depending on the CUDA version.
|
||||
Outputted binaries should be in the `output` folder.
|
||||
|
||||
|
||||
## Pushing
|
||||
|
||||
Packages can be uploaded to an S3 bucket using:
|
||||
|
||||
```
|
||||
aws s3 cp output/*/magma-cuda*.bz2 <bucket-with-path>
|
||||
```
|
||||
|
||||
If you do not have upload permissions, please ping @seemethere or @soumith to gain access
|
||||
|
||||
## New versions
|
||||
|
||||
New CUDA versions can be added by creating a new make target with the next desired version. For CUDA version NN.n, the target should be named `magma-cudaNNn`.
|
||||
|
||||
Make sure to edit the appropriate environment variables (e.g., DESIRED_CUDA, CUDA_ARCH_LIST) in the `Makefile` accordingly. Remember also to check `build_magma.sh` to ensure the logic for copying over the files remains correct.
|
||||
|
||||
New patches can be added by editing `Makefile` and`build_magma.sh` the same way `getrf_nbparam.patch` is implemented.
|
||||
@ -1,50 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -eou pipefail
|
||||
|
||||
# Environment variables
|
||||
# The script expects DESIRED_CUDA and PACKAGE_NAME to be set
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
MAGMA_VERSION=2.6.1
|
||||
|
||||
# Folders for the build
|
||||
PACKAGE_FILES=${ROOT_DIR}/magma/package_files # source patches and metadata
|
||||
PACKAGE_DIR=${ROOT_DIR}/magma/${PACKAGE_NAME} # build workspace
|
||||
PACKAGE_OUTPUT=${ROOT_DIR}/magma/output # where tarballs are stored
|
||||
PACKAGE_BUILD=${PACKAGE_DIR}/build # where the content of the tarball is prepared
|
||||
PACKAGE_RECIPE=${PACKAGE_BUILD}/info/recipe
|
||||
PACKAGE_LICENSE=${PACKAGE_BUILD}/info/licenses
|
||||
mkdir -p ${PACKAGE_DIR} ${PACKAGE_OUTPUT}/linux-64 ${PACKAGE_BUILD} ${PACKAGE_RECIPE} ${PACKAGE_LICENSE}
|
||||
|
||||
# Fetch magma sources and verify checksum
|
||||
pushd ${PACKAGE_DIR}
|
||||
curl -LO http://icl.utk.edu/projectsfiles/magma/downloads/magma-${MAGMA_VERSION}.tar.gz
|
||||
tar zxf magma-${MAGMA_VERSION}.tar.gz
|
||||
sha256sum --check < ${PACKAGE_FILES}/magma-${MAGMA_VERSION}.sha256
|
||||
popd
|
||||
|
||||
# Apply patches and build
|
||||
pushd ${PACKAGE_DIR}/magma-${MAGMA_VERSION}
|
||||
patch < ${PACKAGE_FILES}/CMake.patch
|
||||
patch < ${PACKAGE_FILES}/cmakelists.patch
|
||||
patch -p0 < ${PACKAGE_FILES}/thread_queue.patch
|
||||
patch -p1 < ${PACKAGE_FILES}/getrf_shfl.patch
|
||||
patch -p1 < ${PACKAGE_FILES}/getrf_nbparam.patch
|
||||
# The build.sh script expects to be executed from the sources root folder
|
||||
INSTALL_DIR=${PACKAGE_BUILD} ${PACKAGE_FILES}/build.sh
|
||||
popd
|
||||
|
||||
# Package recipe, license and tarball
|
||||
# Folder and package name are backward compatible for the build workflow
|
||||
cp ${PACKAGE_FILES}/build.sh ${PACKAGE_RECIPE}/build.sh
|
||||
cp ${PACKAGE_FILES}/thread_queue.patch ${PACKAGE_RECIPE}/thread_queue.patch
|
||||
cp ${PACKAGE_FILES}/cmakelists.patch ${PACKAGE_RECIPE}/cmakelists.patch
|
||||
cp ${PACKAGE_FILES}/getrf_shfl.patch ${PACKAGE_RECIPE}/getrf_shfl.patch
|
||||
cp ${PACKAGE_FILES}/getrf_nbparam.patch ${PACKAGE_RECIPE}/getrf_nbparam.patch
|
||||
cp ${PACKAGE_FILES}/CMake.patch ${PACKAGE_RECIPE}/CMake.patch
|
||||
cp ${PACKAGE_FILES}/magma-${MAGMA_VERSION}.sha256 ${PACKAGE_RECIPE}/magma-${MAGMA_VERSION}.sha256
|
||||
cp ${PACKAGE_DIR}/magma-${MAGMA_VERSION}/COPYRIGHT ${PACKAGE_LICENSE}/COPYRIGHT
|
||||
pushd ${PACKAGE_BUILD}
|
||||
tar cjf ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2 include lib info
|
||||
echo Built in ${PACKAGE_OUTPUT}/linux-64/${PACKAGE_NAME}-${MAGMA_VERSION}-1.tar.bz2
|
||||
popd
|
||||
@ -1,40 +0,0 @@
|
||||
--- CMake.src.cuda 2023-03-29 10:05:32.136954140 +0000
|
||||
+++ CMake.src.cuda 2023-03-29 10:05:50.281318043 +0000
|
||||
@@ -283,10 +283,10 @@
|
||||
magmablas/zgeadd.cu
|
||||
magmablas/zgeadd2.cu
|
||||
magmablas/zgeam.cu
|
||||
-magmablas/zgemm_fermi.cu
|
||||
+#magmablas/zgemm_fermi.cu
|
||||
magmablas/zgemm_reduce.cu
|
||||
magmablas/zgemv_conj.cu
|
||||
-magmablas/zgemv_fermi.cu
|
||||
+#magmablas/zgemv_fermi.cu
|
||||
magmablas/zgerbt.cu
|
||||
magmablas/zgerbt_kernels.cu
|
||||
magmablas/zgetmatrix_transpose.cpp
|
||||
@@ -1009,18 +1009,18 @@
|
||||
magmablas/sgeam.cu
|
||||
magmablas/dgeam.cu
|
||||
magmablas/cgeam.cu
|
||||
-magmablas/sgemm_fermi.cu
|
||||
-magmablas/dgemm_fermi.cu
|
||||
-magmablas/cgemm_fermi.cu
|
||||
+#magmablas/sgemm_fermi.cu
|
||||
+#magmablas/dgemm_fermi.cu
|
||||
+#magmablas/cgemm_fermi.cu
|
||||
magmablas/sgemm_reduce.cu
|
||||
magmablas/dgemm_reduce.cu
|
||||
magmablas/cgemm_reduce.cu
|
||||
magmablas/sgemv_conj.cu
|
||||
magmablas/dgemv_conj.cu
|
||||
magmablas/cgemv_conj.cu
|
||||
-magmablas/sgemv_fermi.cu
|
||||
-magmablas/dgemv_fermi.cu
|
||||
-magmablas/cgemv_fermi.cu
|
||||
+#magmablas/sgemv_fermi.cu
|
||||
+#magmablas/dgemv_fermi.cu
|
||||
+#magmablas/cgemv_fermi.cu
|
||||
magmablas/sgerbt.cu
|
||||
magmablas/dgerbt.cu
|
||||
magmablas/cgerbt.cu
|
||||
@ -1,12 +0,0 @@
|
||||
CUDA__VERSION=$(nvcc --version|sed -n 4p|cut -f5 -d" "|cut -f1 -d",")
|
||||
if [ "$CUDA__VERSION" != "$DESIRED_CUDA" ]; then
|
||||
echo "CUDA Version is not $DESIRED_CUDA. CUDA Version found: $CUDA__VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DUSE_FORTRAN=OFF -DGPU_TARGET="All" -DCMAKE_INSTALL_PREFIX="$INSTALL_DIR" -DCUDA_ARCH_LIST="$CUDA_ARCH_LIST"
|
||||
make -j$(getconf _NPROCESSORS_CONF)
|
||||
make install
|
||||
cd ..
|
||||
@ -1,388 +0,0 @@
|
||||
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
||||
index d5d8d87d..8a507334 100644
|
||||
--- a/CMakeLists.txt
|
||||
+++ b/CMakeLists.txt
|
||||
@@ -3,7 +3,7 @@ cmake_minimum_required( VERSION 2.8.1 )
|
||||
# ----------------------------------------
|
||||
# to disable Fortran, set this to "off"
|
||||
# see also -DADD_ below
|
||||
-option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" ON )
|
||||
+option( USE_FORTRAN "Fortran is required for some tester checks, but can be disabled with reduced functionality" OFF )
|
||||
|
||||
if (USE_FORTRAN)
|
||||
project( MAGMA C CXX Fortran )
|
||||
@@ -75,6 +75,8 @@ else()
|
||||
message( WARNING "The compiler ${CMAKE_CXX_COMPILER} doesn't support the -std=c++11 flag. Some code may not compile.")
|
||||
endif()
|
||||
|
||||
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -static-libstdc++ -fno-exceptions")
|
||||
+
|
||||
CHECK_C_COMPILER_FLAG("-std=c99" COMPILER_SUPPORTS_C99)
|
||||
if (COMPILER_SUPPORTS_C99)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
|
||||
@@ -101,15 +103,15 @@ endif()
|
||||
|
||||
|
||||
# ----------------------------------------
|
||||
-# locate OpenMP
|
||||
-find_package( OpenMP )
|
||||
-if (OPENMP_FOUND)
|
||||
- message( STATUS "Found OpenMP" )
|
||||
- message( STATUS " OpenMP_C_FLAGS ${OpenMP_C_FLAGS}" )
|
||||
- message( STATUS " OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" )
|
||||
- set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
|
||||
- set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
|
||||
-endif()
|
||||
+# # locate OpenMP
|
||||
+# find_package( OpenMP )
|
||||
+# if (OPENMP_FOUND)
|
||||
+# message( STATUS "Found OpenMP" )
|
||||
+# message( STATUS " OpenMP_C_FLAGS ${OpenMP_C_FLAGS}" )
|
||||
+# message( STATUS " OpenMP_CXX_FLAGS ${OpenMP_CXX_FLAGS}" )
|
||||
+# set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}" )
|
||||
+# set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}" )
|
||||
+# endif()
|
||||
|
||||
if (MAGMA_ENABLE_CUDA)
|
||||
# ----------------------------------------
|
||||
@@ -132,7 +134,7 @@ if (MAGMA_ENABLE_CUDA)
|
||||
set( NV_SM "" )
|
||||
set( NV_COMP "" )
|
||||
|
||||
- set(CUDA_SEPARABLE_COMPILATION ON)
|
||||
+ set(CUDA_SEPARABLE_COMPILATION OFF)
|
||||
|
||||
# nvcc >= 6.5 supports -std=c++11, so propagate CXXFLAGS to NVCCFLAGS.
|
||||
# Older nvcc didn't support -std=c++11, so previously we disabled propagation.
|
||||
@@ -294,11 +296,18 @@ if (MAGMA_ENABLE_CUDA)
|
||||
message( STATUS " compile for CUDA arch 8.0 (Ampere)" )
|
||||
endif()
|
||||
|
||||
+ if ( ${GPU_TARGET} MATCHES "All")
|
||||
+ set( MIN_ARCH 370)
|
||||
+ SET( NV_SM ${CUDA_ARCH_LIST})
|
||||
+ SET( NV_COMP "")
|
||||
+ endif()
|
||||
+
|
||||
if (NOT MIN_ARCH)
|
||||
message( FATAL_ERROR "GPU_TARGET must contain one or more of Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, or valid sm_[0-9][0-9]" )
|
||||
endif()
|
||||
|
||||
- set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} )
|
||||
+ set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -DHAVE_CUBLAS -Xfatbin -compress-all -Xcompiler -fPIC -std=c++11 ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} )
|
||||
+ MESSAGE(STATUS "CUDA_NVCC_FLAGS: ${CUDA_NVCC_FLAGS}")
|
||||
#add_definitions( "-DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MIN_ARCH}" )
|
||||
set(MAGMA_HAVE_CUDA "1")
|
||||
set(MAGMA_CUDA_ARCH_MIN "${MIN_ARCH}")
|
||||
@@ -413,7 +422,7 @@ set_property(CACHE BLA_VENDOR PROPERTY STRINGS
|
||||
set( LAPACK_LIBRARIES "" CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" )
|
||||
if (LAPACK_LIBRARIES STREQUAL "")
|
||||
message( STATUS "Searching for BLAS and LAPACK. To override, set LAPACK_LIBRARIES using ccmake." )
|
||||
- find_package( LAPACK )
|
||||
+ # find_package( LAPACK )
|
||||
# force showing updated LAPACK_LIBRARIES in ccmake / cmake-gui.
|
||||
set( LAPACK_LIBRARIES ${LAPACK_LIBRARIES} CACHE STRING "Libraries for LAPACK and BLAS, to manually override search" FORCE )
|
||||
else()
|
||||
@@ -552,12 +561,12 @@ if (WIN32)
|
||||
#message( "libmagma_all_f ${libmagma_all_f}" )
|
||||
|
||||
# on Windows, Fortran files aren't compiled if listed here...
|
||||
- cuda_add_library( magma ${libmagma_all_cpp} )
|
||||
+ cuda_add_library( magma STATIC ${libmagma_all_cpp} OPTIONS --compiler-options "-fPIC")
|
||||
target_link_libraries( magma
|
||||
${LAPACK_LIBRARIES}
|
||||
${CUDA_CUDART_LIBRARY}
|
||||
${CUDA_CUBLAS_LIBRARIES}
|
||||
- ${CUDA_cusparse_LIBRARY}
|
||||
+ # ${CUDA_cusparse_LIBRARY}
|
||||
)
|
||||
|
||||
# no Fortran files at the moment (how to test libmagma_all_f is not empty?),
|
||||
@@ -575,13 +584,13 @@ if (WIN32)
|
||||
else()
|
||||
# Unix doesn't seem to have a problem with mixing C, CUDA, and Fortran files
|
||||
if (MAGMA_ENABLE_CUDA)
|
||||
- cuda_add_library( magma ${libmagma_all} )
|
||||
+ cuda_add_library( magma STATIC ${libmagma_all} OPTIONS --compiler-options "-fPIC")
|
||||
target_link_libraries( magma
|
||||
${blas_fix}
|
||||
${LAPACK_LIBRARIES}
|
||||
${CUDA_CUDART_LIBRARY}
|
||||
${CUDA_CUBLAS_LIBRARIES}
|
||||
- ${CUDA_cusparse_LIBRARY}
|
||||
+ # ${CUDA_cusparse_LIBRARY}
|
||||
)
|
||||
else()
|
||||
find_package( hipBLAS )
|
||||
@@ -614,138 +623,139 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
add_custom_target( lib DEPENDS magma )
|
||||
-
|
||||
-
|
||||
-# ----------------------------------------
|
||||
-# compile lapacktest library
|
||||
-# If use fortran, compile only Fortran files, not magma_[sdcz]_no_fortran.cpp
|
||||
-# else, compile only C++ files, not Fortran files
|
||||
-if (USE_FORTRAN)
|
||||
- foreach( filename ${liblapacktest_all} )
|
||||
- if (filename MATCHES "\\.(f|f90|F90)$")
|
||||
- list( APPEND liblapacktest_all_f ${filename} )
|
||||
- endif()
|
||||
- endforeach()
|
||||
- add_library( lapacktest ${liblapacktest_all_f} )
|
||||
-else()
|
||||
- # alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp
|
||||
- foreach( filename ${liblapacktest_all} )
|
||||
- if (filename MATCHES "\\.(c|cu|cpp)$")
|
||||
- list( APPEND liblapacktest_all_cpp ${filename} )
|
||||
- endif()
|
||||
- endforeach()
|
||||
- add_library( lapacktest ${liblapacktest_all_cpp} )
|
||||
-endif()
|
||||
-target_link_libraries( lapacktest
|
||||
- ${blas_fix}
|
||||
- ${LAPACK_LIBRARIES}
|
||||
-)
|
||||
-
|
||||
-
|
||||
-# ----------------------------------------
|
||||
-# compile tester library
|
||||
-add_library( tester ${libtest_all} )
|
||||
-target_link_libraries( tester
|
||||
- magma
|
||||
- lapacktest
|
||||
- ${blas_fix}
|
||||
- ${LAPACK_LIBRARIES}
|
||||
-)
|
||||
+set_target_properties(magma PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
+
|
||||
+
|
||||
+# # ----------------------------------------
|
||||
+# # compile lapacktest library
|
||||
+# # If use fortran, compile only Fortran files, not magma_[sdcz]_no_fortran.cpp
|
||||
+# # else, compile only C++ files, not Fortran files
|
||||
+# if (USE_FORTRAN)
|
||||
+# foreach( filename ${liblapacktest_all} )
|
||||
+# if (filename MATCHES "\\.(f|f90|F90)$")
|
||||
+# list( APPEND liblapacktest_all_f ${filename} )
|
||||
+# endif()
|
||||
+# endforeach()
|
||||
+# add_library( lapacktest ${liblapacktest_all_f} )
|
||||
+# else()
|
||||
+# # alternatively, use only C/C++/CUDA files, including magma_[sdcz]_no_fortran.cpp
|
||||
+# foreach( filename ${liblapacktest_all} )
|
||||
+# if (filename MATCHES "\\.(c|cu|cpp)$")
|
||||
+# list( APPEND liblapacktest_all_cpp ${filename} )
|
||||
+# endif()
|
||||
+# endforeach()
|
||||
+# add_library( lapacktest ${liblapacktest_all_cpp} )
|
||||
+# endif()
|
||||
+# target_link_libraries( lapacktest
|
||||
+# ${blas_fix}
|
||||
+# ${LAPACK_LIBRARIES}
|
||||
+# )
|
||||
+
|
||||
+
|
||||
+# # ----------------------------------------
|
||||
+# # compile tester library
|
||||
+# add_library( tester ${libtest_all} )
|
||||
+# target_link_libraries( tester
|
||||
+# magma
|
||||
+# lapacktest
|
||||
+# ${blas_fix}
|
||||
+# ${LAPACK_LIBRARIES}
|
||||
+# )
|
||||
|
||||
|
||||
# ----------------------------------------
|
||||
# compile MAGMA sparse library
|
||||
|
||||
# sparse doesn't have Fortran at the moment, so no need for above shenanigans
|
||||
-if (MAGMA_ENABLE_CUDA)
|
||||
- include_directories( sparse/include )
|
||||
- include_directories( sparse/control )
|
||||
-else()
|
||||
- include_directories( sparse_hip/include )
|
||||
- include_directories( sparse_hip/control )
|
||||
-endif()
|
||||
-include_directories( testing )
|
||||
-
|
||||
-if (MAGMA_ENABLE_CUDA)
|
||||
- cuda_add_library( magma_sparse ${libsparse_all} )
|
||||
- target_link_libraries( magma_sparse
|
||||
- magma
|
||||
- ${blas_fix}
|
||||
- ${LAPACK_LIBRARIES}
|
||||
- ${CUDA_CUDART_LIBRARY}
|
||||
- ${CUDA_CUBLAS_LIBRARIES}
|
||||
- ${CUDA_cusparse_LIBRARY}
|
||||
- )
|
||||
-else()
|
||||
- add_library( magma_sparse ${libsparse_all} )
|
||||
- target_link_libraries( magma_sparse
|
||||
- magma
|
||||
- ${blas_fix}
|
||||
- ${LAPACK_LIBRARIES}
|
||||
- hip::device
|
||||
- roc::hipblas
|
||||
- roc::hipsparse
|
||||
- )
|
||||
-endif()
|
||||
-add_custom_target( sparse-lib DEPENDS magma_sparse )
|
||||
-
|
||||
-
|
||||
-# ----------------------------------------
|
||||
-# compile each tester
|
||||
-
|
||||
-# save testers to testing/
|
||||
-# save tester lib files to testing_lib/ to avoid cluttering lib/
|
||||
-set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing )
|
||||
-set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib )
|
||||
-set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib )
|
||||
-
|
||||
-# skip Fortran testers, which require an extra file from CUDA
|
||||
-foreach( filename ${testing_all} )
|
||||
- if (filename MATCHES "\\.(c|cu|cpp)$")
|
||||
- list( APPEND testing_all_cpp ${filename} )
|
||||
- endif()
|
||||
-endforeach()
|
||||
-foreach( TEST ${testing_all_cpp} )
|
||||
- string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
|
||||
- string( REGEX REPLACE "testing/" "" EXE ${EXE} )
|
||||
- #message( "${TEST} --> ${EXE}" )
|
||||
- add_executable( ${EXE} ${TEST} )
|
||||
- target_link_libraries( ${EXE} tester lapacktest magma )
|
||||
- list( APPEND testing ${EXE} )
|
||||
-endforeach()
|
||||
-add_custom_target( testing DEPENDS ${testing} )
|
||||
-
|
||||
-
|
||||
-# ----------------------------------------
|
||||
-# compile each sparse tester
|
||||
-
|
||||
-if (MAGMA_ENABLE_CUDA)
|
||||
- set(SPARSE_TEST_DIR "sparse/testing")
|
||||
-else()
|
||||
- set(SPARSE_TEST_DIR "sparse_hip/testing")
|
||||
-endif()
|
||||
-
|
||||
-
|
||||
-set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" )
|
||||
-cmake_policy( SET CMP0037 OLD)
|
||||
-foreach( TEST ${sparse_testing_all} )
|
||||
- string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
|
||||
- string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} )
|
||||
- #message( "${TEST} --> ${EXE}" )
|
||||
- add_executable( ${EXE} ${TEST} )
|
||||
- target_link_libraries( ${EXE} magma_sparse magma )
|
||||
- list( APPEND sparse-testing ${EXE} )
|
||||
-endforeach()
|
||||
-add_custom_target( sparse-testing DEPENDS ${sparse-testing} )
|
||||
+# if (MAGMA_ENABLE_CUDA)
|
||||
+# include_directories( sparse/include )
|
||||
+# include_directories( sparse/control )
|
||||
+# else()
|
||||
+# include_directories( sparse_hip/include )
|
||||
+# include_directories( sparse_hip/control )
|
||||
+# endif()
|
||||
+# include_directories( testing )
|
||||
+
|
||||
+# if (MAGMA_ENABLE_CUDA)
|
||||
+# cuda_add_library( magma_sparse ${libsparse_all} )
|
||||
+# target_link_libraries( magma_sparse
|
||||
+# magma
|
||||
+# ${blas_fix}
|
||||
+# ${LAPACK_LIBRARIES}
|
||||
+# ${CUDA_CUDART_LIBRARY}
|
||||
+# ${CUDA_CUBLAS_LIBRARIES}
|
||||
+# ${CUDA_cusparse_LIBRARY}
|
||||
+# )
|
||||
+# else()
|
||||
+# add_library( magma_sparse ${libsparse_all} )
|
||||
+# target_link_libraries( magma_sparse
|
||||
+# magma
|
||||
+# ${blas_fix}
|
||||
+# ${LAPACK_LIBRARIES}
|
||||
+# hip::device
|
||||
+# roc::hipblas
|
||||
+# roc::hipsparse
|
||||
+# )
|
||||
+# endif()
|
||||
+# add_custom_target( sparse-lib DEPENDS magma_sparse )
|
||||
+
|
||||
+
|
||||
+# # ----------------------------------------
|
||||
+# # compile each tester
|
||||
+
|
||||
+# # save testers to testing/
|
||||
+# # save tester lib files to testing_lib/ to avoid cluttering lib/
|
||||
+# set( CMAKE_RUNTIME_OUTPUT_DIRECTORY testing )
|
||||
+# set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY testing_lib )
|
||||
+# set( CMAKE_LIBRARY_OUTPUT_DIRECTORY testing_lib )
|
||||
+
|
||||
+# # skip Fortran testers, which require an extra file from CUDA
|
||||
+# foreach( filename ${testing_all} )
|
||||
+# if (filename MATCHES "\\.(c|cu|cpp)$")
|
||||
+# list( APPEND testing_all_cpp ${filename} )
|
||||
+# endif()
|
||||
+# endforeach()
|
||||
+# foreach( TEST ${testing_all_cpp} )
|
||||
+# string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
|
||||
+# string( REGEX REPLACE "testing/" "" EXE ${EXE} )
|
||||
+# #message( "${TEST} --> ${EXE}" )
|
||||
+# add_executable( ${EXE} ${TEST} )
|
||||
+# target_link_libraries( ${EXE} tester lapacktest magma )
|
||||
+# list( APPEND testing ${EXE} )
|
||||
+# endforeach()
|
||||
+# add_custom_target( testing DEPENDS ${testing} )
|
||||
+
|
||||
+
|
||||
+# # ----------------------------------------
|
||||
+# # compile each sparse tester
|
||||
+
|
||||
+# if (MAGMA_ENABLE_CUDA)
|
||||
+# set(SPARSE_TEST_DIR "sparse/testing")
|
||||
+# else()
|
||||
+# set(SPARSE_TEST_DIR "sparse_hip/testing")
|
||||
+# endif()
|
||||
+
|
||||
+
|
||||
+# set( CMAKE_RUNTIME_OUTPUT_DIRECTORY "${SPARSE_TEST_DIR}" )
|
||||
+# cmake_policy( SET CMP0037 OLD)
|
||||
+# foreach( TEST ${sparse_testing_all} )
|
||||
+# string( REGEX REPLACE "\\.(cpp|f90|F90)" "" EXE ${TEST} )
|
||||
+# string( REGEX REPLACE "${SPARSE_TEST_DIR}/" "" EXE ${EXE} )
|
||||
+# #message( "${TEST} --> ${EXE}" )
|
||||
+# add_executable( ${EXE} ${TEST} )
|
||||
+# target_link_libraries( ${EXE} magma_sparse magma )
|
||||
+# list( APPEND sparse-testing ${EXE} )
|
||||
+# endforeach()
|
||||
+# add_custom_target( sparse-testing DEPENDS ${sparse-testing} )
|
||||
|
||||
|
||||
# ----------------------------------------
|
||||
# what to install
|
||||
-install( TARGETS magma magma_sparse ${blas_fix}
|
||||
+install( TARGETS magma ${blas_fix}
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib )
|
||||
-file( GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" )
|
||||
+file( GLOB headers include/*.h "${CMAKE_BINARY_DIR}/include/*.h" )
|
||||
if (USE_FORTRAN)
|
||||
install( FILES ${headers} ${modules}
|
||||
DESTINATION include )
|
||||
@@ -769,9 +779,9 @@ else()
|
||||
"${blas_fix_lib} ${LAPACK_LIBS} hip::device roc::hipblas roc::hipsparse" )
|
||||
endif()
|
||||
set( MAGMA_REQUIRED "" )
|
||||
-configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY )
|
||||
-install( FILES "${CMAKE_BINARY_DIR}/${pkgconfig}"
|
||||
- DESTINATION lib/pkgconfig )
|
||||
+# configure_file( "${pkgconfig}.in" "${pkgconfig}" @ONLY )
|
||||
+# install( FILES "${CMAKE_BINARY_DIR}/${pkgconfig}"
|
||||
+# DESTINATION lib/pkgconfig )
|
||||
|
||||
# ----------------------------------------
|
||||
get_directory_property( compile_definitions COMPILE_DEFINITIONS )
|
||||
@ -1,40 +0,0 @@
|
||||
diff --git a/control/get_batched_crossover.cpp b/control/get_batched_crossover.cpp
|
||||
index 4ec57306..912f8608 100644
|
||||
--- a/control/get_batched_crossover.cpp
|
||||
+++ b/control/get_batched_crossover.cpp
|
||||
@@ -119,7 +119,7 @@ void magma_get_spotrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_
|
||||
void magma_get_zgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb)
|
||||
{
|
||||
*nb = 64;
|
||||
- *recnb = 32;
|
||||
+ *recnb = 16;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -127,7 +127,7 @@ void magma_get_zgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_
|
||||
void magma_get_cgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb)
|
||||
{
|
||||
*nb = 128;
|
||||
- *recnb = 32;
|
||||
+ *recnb = 16;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -135,7 +135,7 @@ void magma_get_cgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_
|
||||
void magma_get_dgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb)
|
||||
{
|
||||
*nb = 128;
|
||||
- *recnb = 32;
|
||||
+ *recnb = 16;
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -143,7 +143,7 @@ void magma_get_dgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_
|
||||
void magma_get_sgetrf_batched_nbparam(magma_int_t n, magma_int_t *nb, magma_int_t *recnb)
|
||||
{
|
||||
*nb = 128;
|
||||
- *recnb = 32;
|
||||
+ *recnb = 16;
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1,15 +0,0 @@
|
||||
diff --git a/src/zgetrf_batched.cpp b/src/zgetrf_batched.cpp
|
||||
index 24a65a90..884d9352 100644
|
||||
--- a/src/zgetrf_batched.cpp
|
||||
+++ b/src/zgetrf_batched.cpp
|
||||
@@ -116,7 +116,9 @@ magma_zgetrf_batched(
|
||||
return magma_zgetrf_batched_smallsq_noshfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue );
|
||||
}
|
||||
else{
|
||||
- return magma_zgetrf_batched_smallsq_shfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue );
|
||||
+ // magma_cgetrf_batched_smallsq_shfl is broken, therefore let's call noshfl version for arch < 700
|
||||
+ // return magma_zgetrf_batched_smallsq_shfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue );
|
||||
+ return magma_zgetrf_batched_smallsq_noshfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue );
|
||||
}
|
||||
#else
|
||||
return magma_zgetrf_batched_smallsq_noshfl( m, dA_array, ldda, ipiv_array, info_array, batchCount, queue );
|
||||
@ -1 +0,0 @@
|
||||
6cd83808c6e8bc7a44028e05112b3ab4e579bcc73202ed14733f66661127e213 magma-2.6.1.tar.gz
|
||||
@ -1,20 +0,0 @@
|
||||
--- control/thread_queue.cpp 2016-08-30 06:37:49.000000000 -0700
|
||||
+++ control/thread_queue.cpp 2016-10-10 19:47:28.911580965 -0700
|
||||
@@ -15,7 +15,7 @@
|
||||
{
|
||||
if ( err != 0 ) {
|
||||
fprintf( stderr, "Error: %s (%d)\n", strerror(err), err );
|
||||
- throw std::exception();
|
||||
+ // throw std::exception();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -172,7 +172,7 @@
|
||||
check( pthread_mutex_lock( &mutex ));
|
||||
if ( quit_flag ) {
|
||||
fprintf( stderr, "Error: push_task() called after quit()\n" );
|
||||
- throw std::exception();
|
||||
+ // throw std::exception();
|
||||
}
|
||||
q.push( task );
|
||||
ntask += 1;
|
||||
@ -1,21 +0,0 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2016 manylinux
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@ -1,28 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||
|
||||
case "${GPU_ARCH_TYPE:-BLANK}" in
|
||||
BLANK)
|
||||
# Legacy behavior for CircleCI
|
||||
bash "${SCRIPTPATH}/build_cuda.sh"
|
||||
;;
|
||||
cuda)
|
||||
bash "${SCRIPTPATH}/build_cuda.sh"
|
||||
;;
|
||||
rocm)
|
||||
bash "${SCRIPTPATH}/build_rocm.sh"
|
||||
;;
|
||||
cpu | cpu-cxx11-abi | cpu-s390x)
|
||||
bash "${SCRIPTPATH}/build_cpu.sh"
|
||||
;;
|
||||
xpu)
|
||||
bash "${SCRIPTPATH}/build_xpu.sh"
|
||||
;;
|
||||
*)
|
||||
echo "Un-recognized GPU_ARCH_TYPE '${GPU_ARCH_TYPE}', exiting..."
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
@ -1,482 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# meant to be called only from the neighboring build.sh and build_cpu.sh scripts
|
||||
|
||||
set -ex
|
||||
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
|
||||
source ${SOURCE_DIR}/set_desired_python.sh
|
||||
|
||||
|
||||
if [[ -n "$BUILD_PYTHONLESS" && -z "$LIBTORCH_VARIANT" ]]; then
|
||||
echo "BUILD_PYTHONLESS is set, so need LIBTORCH_VARIANT to also be set"
|
||||
echo "LIBTORCH_VARIANT should be one of shared-with-deps shared-without-deps static-with-deps static-without-deps"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Function to retry functions that sometimes timeout or have flaky failures
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
# TODO move this into the Docker images
|
||||
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
retry dnf install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
# TODO: Remove this once nvidia package repos are back online
|
||||
# Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
|
||||
# shellcheck disable=SC2046
|
||||
sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
|
||||
|
||||
retry apt-get update
|
||||
retry apt-get -y install zip openssl
|
||||
fi
|
||||
|
||||
# We use the package name to test the package by passing this to 'pip install'
|
||||
# This is the env variable that setup.py uses to name the package. Note that
|
||||
# pip 'normalizes' the name first by changing all - to _
|
||||
if [[ -z "$TORCH_PACKAGE_NAME" ]]; then
|
||||
TORCH_PACKAGE_NAME='torch'
|
||||
fi
|
||||
|
||||
if [[ -z "$TORCH_NO_PYTHON_PACKAGE_NAME" ]]; then
|
||||
TORCH_NO_PYTHON_PACKAGE_NAME='torch_no_python'
|
||||
fi
|
||||
|
||||
TORCH_PACKAGE_NAME="$(echo $TORCH_PACKAGE_NAME | tr '-' '_')"
|
||||
TORCH_NO_PYTHON_PACKAGE_NAME="$(echo $TORCH_NO_PYTHON_PACKAGE_NAME | tr '-' '_')"
|
||||
echo "Expecting the built wheels to all be called '$TORCH_PACKAGE_NAME' or '$TORCH_NO_PYTHON_PACKAGE_NAME'"
|
||||
|
||||
# Version: setup.py uses $PYTORCH_BUILD_VERSION.post$PYTORCH_BUILD_NUMBER if
|
||||
# PYTORCH_BUILD_NUMBER > 1
|
||||
build_version="$PYTORCH_BUILD_VERSION"
|
||||
build_number="$PYTORCH_BUILD_NUMBER"
|
||||
if [[ -n "$OVERRIDE_PACKAGE_VERSION" ]]; then
|
||||
# This will be the *exact* version, since build_number<1
|
||||
build_version="$OVERRIDE_PACKAGE_VERSION"
|
||||
build_number=0
|
||||
fi
|
||||
if [[ -z "$build_version" ]]; then
|
||||
build_version=1.0.0
|
||||
fi
|
||||
if [[ -z "$build_number" ]]; then
|
||||
build_number=1
|
||||
fi
|
||||
export PYTORCH_BUILD_VERSION=$build_version
|
||||
export PYTORCH_BUILD_NUMBER=$build_number
|
||||
|
||||
export CMAKE_LIBRARY_PATH="/opt/intel/lib:/lib:$CMAKE_LIBRARY_PATH"
|
||||
export CMAKE_INCLUDE_PATH="/opt/intel/include:$CMAKE_INCLUDE_PATH"
|
||||
|
||||
if [[ -e /opt/openssl ]]; then
|
||||
export OPENSSL_ROOT_DIR=/opt/openssl
|
||||
export CMAKE_INCLUDE_PATH="/opt/openssl/include":$CMAKE_INCLUDE_PATH
|
||||
fi
|
||||
|
||||
|
||||
|
||||
mkdir -p /tmp/$WHEELHOUSE_DIR
|
||||
|
||||
export PATCHELF_BIN=/usr/local/bin/patchelf
|
||||
patchelf_version=$($PATCHELF_BIN --version)
|
||||
echo "patchelf version: " $patchelf_version
|
||||
if [[ "$patchelf_version" == "patchelf 0.9" ]]; then
|
||||
echo "Your patchelf version is too old. Please use version >= 0.10."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
########################################################
|
||||
# Compile wheels as well as libtorch
|
||||
#######################################################
|
||||
if [[ -z "$PYTORCH_ROOT" ]]; then
|
||||
echo "Need to set PYTORCH_ROOT env variable"
|
||||
exit 1
|
||||
fi
|
||||
pushd "$PYTORCH_ROOT"
|
||||
python setup.py clean
|
||||
retry pip install -qr requirements.txt
|
||||
case ${DESIRED_PYTHON} in
|
||||
cp31*)
|
||||
retry pip install -q --pre numpy==2.1.0
|
||||
;;
|
||||
# Should catch 3.9+
|
||||
*)
|
||||
retry pip install -q --pre numpy==2.0.2
|
||||
;;
|
||||
esac
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
fi
|
||||
|
||||
# This value comes from binary_linux_build.sh (and should only be set to true
|
||||
# for master / release branches)
|
||||
BUILD_DEBUG_INFO=${BUILD_DEBUG_INFO:=0}
|
||||
|
||||
if [[ $BUILD_DEBUG_INFO == "1" ]]; then
|
||||
echo "Building wheel and debug info"
|
||||
else
|
||||
echo "BUILD_DEBUG_INFO was not set, skipping debug info"
|
||||
fi
|
||||
|
||||
if [[ "$DISABLE_RCCL" = 1 ]]; then
|
||||
echo "Disabling NCCL/RCCL in pyTorch"
|
||||
USE_RCCL=0
|
||||
USE_NCCL=0
|
||||
USE_KINETO=0
|
||||
else
|
||||
USE_RCCL=1
|
||||
USE_NCCL=1
|
||||
USE_KINETO=1
|
||||
fi
|
||||
|
||||
echo "Calling setup.py bdist at $(date)"
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
echo "Calling setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||
time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||
echo "Finished setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||
echo "Calling setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||
time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR --cmake
|
||||
echo "Finished setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||
else
|
||||
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||
fi
|
||||
echo "Finished setup.py bdist at $(date)"
|
||||
|
||||
# Build libtorch packages
|
||||
if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
# Now build pythonless libtorch
|
||||
# Note - just use whichever python we happen to be on
|
||||
python setup.py clean
|
||||
|
||||
if [[ $LIBTORCH_VARIANT = *"static"* ]]; then
|
||||
STATIC_CMAKE_FLAG="-DTORCH_STATIC=1"
|
||||
fi
|
||||
|
||||
mkdir -p build
|
||||
pushd build
|
||||
echo "Calling tools/build_libtorch.py at $(date)"
|
||||
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \
|
||||
python ../tools/build_libtorch.py
|
||||
echo "Finished tools/build_libtorch.py at $(date)"
|
||||
popd
|
||||
|
||||
mkdir -p libtorch/{lib,bin,include,share}
|
||||
cp -r build/build/lib libtorch/
|
||||
|
||||
# for now, the headers for the libtorch package will just be copied in
|
||||
# from one of the wheels (this is from when this script built multiple
|
||||
# wheels at once)
|
||||
ANY_WHEEL=$(ls /tmp/$WHEELHOUSE_DIR/torch*.whl | head -n1)
|
||||
unzip -d any_wheel $ANY_WHEEL
|
||||
if [[ -d any_wheel/torch/include ]]; then
|
||||
cp -r any_wheel/torch/include libtorch/
|
||||
else
|
||||
cp -r any_wheel/torch/lib/include libtorch/
|
||||
fi
|
||||
cp -r any_wheel/torch/share/cmake libtorch/share/
|
||||
rm -rf any_wheel
|
||||
|
||||
echo $PYTORCH_BUILD_VERSION > libtorch/build-version
|
||||
echo "$(pushd $PYTORCH_ROOT && git rev-parse HEAD)" > libtorch/build-hash
|
||||
|
||||
mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
|
||||
cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
|
||||
/tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip
|
||||
fi
|
||||
|
||||
popd
|
||||
|
||||
#######################################################################
|
||||
# ADD DEPENDENCIES INTO THE WHEEL
|
||||
#
|
||||
# auditwheel repair doesn't work correctly and is buggy
|
||||
# so manually do the work of copying dependency libs and patchelfing
|
||||
# and fixing RECORDS entries correctly
|
||||
######################################################################
|
||||
|
||||
fname_with_sha256() {
|
||||
HASH=$(sha256sum $1 | cut -c1-8)
|
||||
DIRNAME=$(dirname $1)
|
||||
BASENAME=$(basename $1)
|
||||
# Do not rename nvrtc-builtins.so as they are dynamically loaded
|
||||
# by libnvrtc.so
|
||||
# Similarly don't mangle libcudnn and libcublas library names
|
||||
if [[ $BASENAME == "libnvrtc-builtins.s"* || $BASENAME == "libcudnn"* || $BASENAME == "libcublas"* ]]; then
|
||||
echo $1
|
||||
else
|
||||
INITNAME=$(echo $BASENAME | cut -f1 -d".")
|
||||
ENDNAME=$(echo $BASENAME | cut -f 2- -d".")
|
||||
echo "$DIRNAME/$INITNAME-$HASH.$ENDNAME"
|
||||
fi
|
||||
}
|
||||
|
||||
fname_without_so_number() {
|
||||
LINKNAME=$(echo $1 | sed -e 's/\.so.*/.so/g')
|
||||
echo "$LINKNAME"
|
||||
}
|
||||
|
||||
make_wheel_record() {
|
||||
FPATH=$1
|
||||
if echo $FPATH | grep RECORD >/dev/null 2>&1; then
|
||||
# if the RECORD file, then
|
||||
echo "$FPATH,,"
|
||||
else
|
||||
HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
|
||||
FSIZE=$(ls -nl $FPATH | awk '{print $5}')
|
||||
echo "$FPATH,sha256=$HASH,$FSIZE"
|
||||
fi
|
||||
}
|
||||
|
||||
replace_needed_sofiles() {
|
||||
find $1 -name '*.so*' | while read sofile; do
|
||||
origname=$2
|
||||
patchedname=$3
|
||||
if [[ "$origname" != "$patchedname" ]] || [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
set +e
|
||||
origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*")
|
||||
ERRCODE=$?
|
||||
set -e
|
||||
if [ "$ERRCODE" -eq "0" ]; then
|
||||
echo "patching $sofile entry $origname to $patchedname"
|
||||
$PATCHELF_BIN --replace-needed $origname $patchedname $sofile
|
||||
fi
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
echo 'Built this wheel:'
|
||||
ls /tmp/$WHEELHOUSE_DIR
|
||||
mkdir -p "/$WHEELHOUSE_DIR"
|
||||
mv /tmp/$WHEELHOUSE_DIR/torch*linux*.whl /$WHEELHOUSE_DIR/
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
mv /tmp/$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/ || true
|
||||
fi
|
||||
|
||||
if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
mkdir -p /$LIBTORCH_HOUSE_DIR
|
||||
mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR
|
||||
rm -rf /tmp/$LIBTORCH_HOUSE_DIR
|
||||
fi
|
||||
rm -rf /tmp/$WHEELHOUSE_DIR
|
||||
rm -rf /tmp_dir
|
||||
mkdir /tmp_dir
|
||||
pushd /tmp_dir
|
||||
|
||||
for pkg in /$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/torch*linux*.whl /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do
|
||||
|
||||
# if the glob didn't match anything
|
||||
if [[ ! -e $pkg ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
rm -rf tmp
|
||||
mkdir -p tmp
|
||||
cd tmp
|
||||
cp $pkg .
|
||||
|
||||
unzip -q $(basename $pkg)
|
||||
rm -f $(basename $pkg)
|
||||
|
||||
if [[ -d torch ]]; then
|
||||
PREFIX=torch
|
||||
else
|
||||
PREFIX=libtorch
|
||||
fi
|
||||
|
||||
if [[ $pkg != *"without-deps"* ]]; then
|
||||
# copy over needed dependent .so files over and tag them with their hash
|
||||
patched=()
|
||||
for filepath in "${DEPS_LIST[@]}"; do
|
||||
filename=$(basename $filepath)
|
||||
destpath=$PREFIX/lib/$filename
|
||||
if [[ "$filepath" != "$destpath" ]]; then
|
||||
cp $filepath $destpath
|
||||
fi
|
||||
|
||||
# ROCm workaround for roctracer dlopens
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
patchedpath=$(fname_without_so_number $destpath)
|
||||
# Keep the so number for XPU dependencies
|
||||
elif [[ "$DESIRED_CUDA" == *"xpu"* ]]; then
|
||||
patchedpath=$destpath
|
||||
else
|
||||
patchedpath=$(fname_with_sha256 $destpath)
|
||||
fi
|
||||
patchedname=$(basename $patchedpath)
|
||||
if [[ "$destpath" != "$patchedpath" ]]; then
|
||||
mv $destpath $patchedpath
|
||||
fi
|
||||
patched+=("$patchedname")
|
||||
echo "Copied $filepath to $patchedpath"
|
||||
done
|
||||
|
||||
echo "patching to fix the so names to the hashed names"
|
||||
for ((i=0;i<${#DEPS_LIST[@]};++i)); do
|
||||
replace_needed_sofiles $PREFIX ${DEPS_SONAME[i]} ${patched[i]}
|
||||
# do the same for caffe2, if it exists
|
||||
if [[ -d caffe2 ]]; then
|
||||
replace_needed_sofiles caffe2 ${DEPS_SONAME[i]} ${patched[i]}
|
||||
fi
|
||||
done
|
||||
|
||||
# copy over needed auxiliary files
|
||||
for ((i=0;i<${#DEPS_AUX_SRCLIST[@]};++i)); do
|
||||
srcpath=${DEPS_AUX_SRCLIST[i]}
|
||||
dstpath=$PREFIX/${DEPS_AUX_DSTLIST[i]}
|
||||
mkdir -p $(dirname $dstpath)
|
||||
cp $srcpath $dstpath
|
||||
done
|
||||
fi
|
||||
|
||||
# set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib
|
||||
find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do
|
||||
echo "Setting rpath of $sofile to ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'}"
|
||||
$PATCHELF_BIN --set-rpath ${C_SO_RPATH:-'$ORIGIN:$ORIGIN/lib'} ${FORCE_RPATH:-} $sofile
|
||||
$PATCHELF_BIN --print-rpath $sofile
|
||||
done
|
||||
|
||||
# set RPATH of lib/ files to $ORIGIN
|
||||
find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do
|
||||
echo "Setting rpath of $sofile to ${LIB_SO_RPATH:-'$ORIGIN'}"
|
||||
$PATCHELF_BIN --set-rpath ${LIB_SO_RPATH:-'$ORIGIN'} ${FORCE_RPATH:-} $sofile
|
||||
$PATCHELF_BIN --print-rpath $sofile
|
||||
done
|
||||
|
||||
# regenerate the RECORD file with new hashes
|
||||
record_file=$(echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g')
|
||||
if [[ -e $record_file ]]; then
|
||||
echo "Generating new record file $record_file"
|
||||
: > "$record_file"
|
||||
# generate records for folders in wheel
|
||||
find * -type f | while read fname; do
|
||||
make_wheel_record "$fname" >>"$record_file"
|
||||
done
|
||||
fi
|
||||
|
||||
if [[ $BUILD_DEBUG_INFO == "1" ]]; then
|
||||
pushd "$PREFIX/lib"
|
||||
|
||||
# Duplicate library into debug lib
|
||||
cp libtorch_cpu.so libtorch_cpu.so.dbg
|
||||
|
||||
# Keep debug symbols on debug lib
|
||||
strip --only-keep-debug libtorch_cpu.so.dbg
|
||||
|
||||
# Remove debug info from release lib
|
||||
strip --strip-debug libtorch_cpu.so
|
||||
|
||||
objcopy libtorch_cpu.so --add-gnu-debuglink=libtorch_cpu.so.dbg
|
||||
|
||||
# Zip up debug info
|
||||
mkdir -p /tmp/debug
|
||||
mv libtorch_cpu.so.dbg /tmp/debug/libtorch_cpu.so.dbg
|
||||
CRC32=$(objcopy --dump-section .gnu_debuglink=>(tail -c4 | od -t x4 -An | xargs echo) libtorch_cpu.so)
|
||||
|
||||
pushd /tmp
|
||||
PKG_NAME=$(basename "$pkg" | sed 's/\.whl$//g')
|
||||
zip /tmp/debug-whl-libtorch-"$PKG_NAME"-"$CRC32".zip /tmp/debug/libtorch_cpu.so.dbg
|
||||
cp /tmp/debug-whl-libtorch-"$PKG_NAME"-"$CRC32".zip "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
popd
|
||||
|
||||
popd
|
||||
fi
|
||||
|
||||
# zip up the wheel back
|
||||
zip -rq $(basename $pkg) $PREIX*
|
||||
|
||||
# replace original wheel
|
||||
rm -f $pkg
|
||||
mv $(basename $pkg) $pkg
|
||||
cd ..
|
||||
rm -rf tmp
|
||||
done
|
||||
|
||||
# Copy wheels to host machine for persistence before testing
|
||||
if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||
cp /$LIBTORCH_HOUSE_DIR/libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
else
|
||||
cp /$WHEELHOUSE_DIR/torch*.whl "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
fi
|
||||
fi
|
||||
|
||||
# remove stuff before testing
|
||||
rm -rf /opt/rh
|
||||
if ls /usr/local/cuda* >/dev/null 2>&1; then
|
||||
rm -rf /usr/local/cuda*
|
||||
fi
|
||||
|
||||
|
||||
# Test that all the wheels work
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
export OMP_NUM_THREADS=4 # on NUMA machines this takes too long
|
||||
pushd $PYTORCH_ROOT/test
|
||||
|
||||
# Install the wheel for this Python version
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pip uninstall -y "$TORCH_NO_PYTHON_PACKAGE_NAME" || true
|
||||
fi
|
||||
|
||||
pip uninstall -y "$TORCH_PACKAGE_NAME"
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
pip install "$TORCH_NO_PYTHON_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
fi
|
||||
|
||||
pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||
|
||||
# Print info on the libraries installed in this wheel
|
||||
# Rather than adjust find command to skip non-library files with an embedded *.so* in their name,
|
||||
# since this is only for reporting purposes, we add the || true to the ldd command.
|
||||
installed_libraries=($(find "$pydir/lib/python${py_majmin}/site-packages/torch/" -name '*.so*'))
|
||||
echo "The wheel installed all of the libraries: ${installed_libraries[@]}"
|
||||
for installed_lib in "${installed_libraries[@]}"; do
|
||||
ldd "$installed_lib" || true
|
||||
done
|
||||
|
||||
# Run the tests
|
||||
echo "$(date) :: Running tests"
|
||||
pushd "$PYTORCH_ROOT"
|
||||
|
||||
#TODO: run_tests.sh and check_binary.sh should be moved to pytorch/pytorch project
|
||||
LD_LIBRARY_PATH=/usr/local/nvidia/lib64 \
|
||||
"/builder/run_tests.sh" manywheel "${py_majmin}" "$DESIRED_CUDA"
|
||||
popd
|
||||
echo "$(date) :: Finished tests"
|
||||
fi
|
||||
@ -1,60 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
export TH_BINARY_BUILD=1
|
||||
export USE_CUDA=0
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build()
|
||||
CMAKE_ARGS=()
|
||||
fi
|
||||
if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build_caffe2()
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=()
|
||||
fi
|
||||
|
||||
WHEELHOUSE_DIR="wheelhousecpu"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_housecpu"
|
||||
if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhousecpu"
|
||||
else
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_housecpu"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
|
||||
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
if [[ "$(uname -m)" == "s390x" ]]; then
|
||||
LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1"
|
||||
else
|
||||
LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
|
||||
fi
|
||||
fi
|
||||
|
||||
DEPS_LIST=(
|
||||
"$LIBGOMP_PATH"
|
||||
)
|
||||
|
||||
DEPS_SONAME=(
|
||||
"libgomp.so.1"
|
||||
)
|
||||
|
||||
rm -rf /usr/local/cuda*
|
||||
|
||||
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
BUILD_SCRIPT=build_common.sh
|
||||
else
|
||||
BUILD_SCRIPT=build_libtorch.sh
|
||||
fi
|
||||
source ${SOURCE_DIR}/${BUILD_SCRIPT}
|
||||
@ -1,299 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P ))"
|
||||
|
||||
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
|
||||
export NCCL_ROOT_DIR=/usr/local/cuda
|
||||
export TH_BINARY_BUILD=1
|
||||
export USE_STATIC_CUDNN=1
|
||||
export USE_STATIC_NCCL=1
|
||||
export ATEN_STATIC_CUDA=1
|
||||
export USE_CUDA_STATIC_LINK=1
|
||||
export INSTALL_TEST=0 # dont install test binaries into site-packages
|
||||
export USE_CUPTI_SO=0
|
||||
export USE_CUSPARSELT=${USE_CUSPARSELT:-1} # Enable if not disabled by libtorch build
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build()
|
||||
CMAKE_ARGS=()
|
||||
fi
|
||||
if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build_caffe2()
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=()
|
||||
fi
|
||||
|
||||
# Determine CUDA version and architectures to build for
|
||||
#
|
||||
# NOTE: We should first check `DESIRED_CUDA` when determining `CUDA_VERSION`,
|
||||
# because in some cases a single Docker image can have multiple CUDA versions
|
||||
# on it, and `nvcc --version` might not show the CUDA version we want.
|
||||
if [[ -n "$DESIRED_CUDA" ]]; then
|
||||
# If the DESIRED_CUDA already matches the format that we expect
|
||||
if [[ ${DESIRED_CUDA} =~ ^[0-9]+\.[0-9]+$ ]]; then
|
||||
CUDA_VERSION=${DESIRED_CUDA}
|
||||
else
|
||||
# cu90, cu92, cu100, cu101
|
||||
if [[ ${#DESIRED_CUDA} -eq 4 ]]; then
|
||||
CUDA_VERSION="${DESIRED_CUDA:2:1}.${DESIRED_CUDA:3:1}"
|
||||
elif [[ ${#DESIRED_CUDA} -eq 5 ]]; then
|
||||
CUDA_VERSION="${DESIRED_CUDA:2:2}.${DESIRED_CUDA:4:1}"
|
||||
fi
|
||||
fi
|
||||
echo "Using CUDA $CUDA_VERSION as determined by DESIRED_CUDA"
|
||||
|
||||
# There really has to be a better way to do this - eli
|
||||
# Possibly limiting builds to specific cuda versions be delimiting images would be a choice
|
||||
if [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
echo "Switching to CUDA version ${DESIRED_CUDA}"
|
||||
/builder/conda/switch_cuda_version.sh "${DESIRED_CUDA}"
|
||||
fi
|
||||
else
|
||||
CUDA_VERSION=$(nvcc --version|grep release|cut -f5 -d" "|cut -f1 -d",")
|
||||
echo "CUDA $CUDA_VERSION Detected"
|
||||
fi
|
||||
|
||||
cuda_version_nodot=$(echo $CUDA_VERSION | tr -d '.')
|
||||
|
||||
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6"
|
||||
case ${CUDA_VERSION} in
|
||||
12.6)
|
||||
if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
|
||||
TORCH_CUDA_ARCH_LIST="9.0"
|
||||
else
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0+PTX"
|
||||
fi
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
12.4)
|
||||
if [[ "$GPU_ARCH_TYPE" = "cuda-aarch64" ]]; then
|
||||
TORCH_CUDA_ARCH_LIST="9.0"
|
||||
else
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
|
||||
fi
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
12.1)
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};9.0"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
11.8)
|
||||
TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST};3.7;9.0"
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS+=("-DATEN_NO_TEST=ON")
|
||||
;;
|
||||
*)
|
||||
echo "unknown cuda version $CUDA_VERSION"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
|
||||
export TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST}
|
||||
echo "${TORCH_CUDA_ARCH_LIST}"
|
||||
|
||||
# Package directories
|
||||
WHEELHOUSE_DIR="wheelhouse$cuda_version_nodot"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_house$cuda_version_nodot"
|
||||
if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$cuda_version_nodot"
|
||||
else
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$cuda_version_nodot"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
|
||||
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
|
||||
fi
|
||||
|
||||
DEPS_LIST=(
|
||||
"$LIBGOMP_PATH"
|
||||
)
|
||||
DEPS_SONAME=(
|
||||
"libgomp.so.1"
|
||||
)
|
||||
|
||||
# CUDA 11.8 have to ship the libcusparseLt.so.0 with the binary
|
||||
# since nvidia-cusparselt-cu11 is not available in PYPI
|
||||
if [[ $USE_CUSPARSELT == "1" && $CUDA_VERSION == "11.8" ]]; then
|
||||
DEPS_SONAME+=(
|
||||
"libcusparseLt.so.0"
|
||||
)
|
||||
DEPS_LIST+=(
|
||||
"/usr/local/cuda/lib64/libcusparseLt.so.0"
|
||||
)
|
||||
fi
|
||||
|
||||
if [[ $CUDA_VERSION == "12.4" || $CUDA_VERSION == "12.6" ]]; then
|
||||
export USE_STATIC_CUDNN=0
|
||||
# Try parallelizing nvcc as well
|
||||
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
|
||||
|
||||
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
|
||||
echo "Bundling with cudnn and cublas."
|
||||
DEPS_LIST+=(
|
||||
"/usr/local/cuda/lib64/libcudnn_adv.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_cnn.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_graph.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_ops.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn.so.9"
|
||||
"/usr/local/cuda/lib64/libcublas.so.12"
|
||||
"/usr/local/cuda/lib64/libcublasLt.so.12"
|
||||
"/usr/local/cuda/lib64/libcusparseLt.so.0"
|
||||
"/usr/local/cuda/lib64/libcudart.so.12"
|
||||
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.12"
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libcudnn_adv.so.9"
|
||||
"libcudnn_cnn.so.9"
|
||||
"libcudnn_graph.so.9"
|
||||
"libcudnn_ops.so.9"
|
||||
"libcudnn_engines_runtime_compiled.so.9"
|
||||
"libcudnn_engines_precompiled.so.9"
|
||||
"libcudnn_heuristic.so.9"
|
||||
"libcudnn.so.9"
|
||||
"libcublas.so.12"
|
||||
"libcublasLt.so.12"
|
||||
"libcusparseLt.so.0"
|
||||
"libcudart.so.12"
|
||||
"libnvToolsExt.so.1"
|
||||
"libnvrtc.so.12"
|
||||
"libnvrtc-builtins.so"
|
||||
)
|
||||
else
|
||||
echo "Using nvidia libs from pypi."
|
||||
CUDA_RPATHS=(
|
||||
'$ORIGIN/../../nvidia/cublas/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_cupti/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_nvrtc/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_runtime/lib'
|
||||
'$ORIGIN/../../nvidia/cudnn/lib'
|
||||
'$ORIGIN/../../nvidia/cufft/lib'
|
||||
'$ORIGIN/../../nvidia/curand/lib'
|
||||
'$ORIGIN/../../nvidia/cusolver/lib'
|
||||
'$ORIGIN/../../nvidia/cusparse/lib'
|
||||
'$ORIGIN/../../cusparselt/lib'
|
||||
'$ORIGIN/../../nvidia/nccl/lib'
|
||||
'$ORIGIN/../../nvidia/nvtx/lib'
|
||||
)
|
||||
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
|
||||
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
|
||||
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
export USE_STATIC_NCCL=0
|
||||
export USE_SYSTEM_NCCL=1
|
||||
export ATEN_STATIC_CUDA=0
|
||||
export USE_CUDA_STATIC_LINK=0
|
||||
export USE_CUPTI_SO=1
|
||||
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
fi
|
||||
elif [[ $CUDA_VERSION == "11.8" ]]; then
|
||||
export USE_STATIC_CUDNN=0
|
||||
# Try parallelizing nvcc as well
|
||||
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
|
||||
# Bundle ptxas into the wheel, see https://github.com/pytorch/pytorch/pull/119750
|
||||
export BUILD_BUNDLE_PTXAS=1
|
||||
|
||||
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
|
||||
echo "Bundling with cudnn and cublas."
|
||||
DEPS_LIST+=(
|
||||
"/usr/local/cuda/lib64/libcudnn_adv.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_cnn.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_graph.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_ops.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9"
|
||||
"/usr/local/cuda/lib64/libcudnn.so.9"
|
||||
"/usr/local/cuda/lib64/libcublas.so.11"
|
||||
"/usr/local/cuda/lib64/libcublasLt.so.11"
|
||||
"/usr/local/cuda/lib64/libcudart.so.11.0"
|
||||
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
|
||||
"/usr/local/cuda/lib64/libnvrtc.so.11.2" # this is not a mistake, it links to more specific cuda version
|
||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so.11.8"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libcudnn_adv.so.9"
|
||||
"libcudnn_cnn.so.9"
|
||||
"libcudnn_graph.so.9"
|
||||
"libcudnn_ops.so.9"
|
||||
"libcudnn_engines_runtime_compiled.so.9"
|
||||
"libcudnn_engines_precompiled.so.9"
|
||||
"libcudnn_heuristic.so.9"
|
||||
"libcudnn.so.9"
|
||||
"libcublas.so.11"
|
||||
"libcublasLt.so.11"
|
||||
"libcudart.so.11.0"
|
||||
"libnvToolsExt.so.1"
|
||||
"libnvrtc.so.11.2"
|
||||
"libnvrtc-builtins.so.11.8"
|
||||
)
|
||||
else
|
||||
echo "Using nvidia libs from pypi."
|
||||
CUDA_RPATHS=(
|
||||
'$ORIGIN/../../nvidia/cublas/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_cupti/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_nvrtc/lib'
|
||||
'$ORIGIN/../../nvidia/cuda_runtime/lib'
|
||||
'$ORIGIN/../../nvidia/cudnn/lib'
|
||||
'$ORIGIN/../../nvidia/cufft/lib'
|
||||
'$ORIGIN/../../nvidia/curand/lib'
|
||||
'$ORIGIN/../../nvidia/cusolver/lib'
|
||||
'$ORIGIN/../../nvidia/cusparse/lib'
|
||||
'$ORIGIN/../../nvidia/nccl/lib'
|
||||
'$ORIGIN/../../nvidia/nvtx/lib'
|
||||
)
|
||||
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
|
||||
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
|
||||
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
export USE_STATIC_NCCL=0
|
||||
export USE_SYSTEM_NCCL=1
|
||||
export ATEN_STATIC_CUDA=0
|
||||
export USE_CUDA_STATIC_LINK=0
|
||||
export USE_CUPTI_SO=1
|
||||
export NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
|
||||
export NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
||||
fi
|
||||
else
|
||||
echo "Unknown cuda version $CUDA_VERSION"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# builder/test.sh requires DESIRED_CUDA to know what tests to exclude
|
||||
export DESIRED_CUDA="$cuda_version_nodot"
|
||||
|
||||
# Switch `/usr/local/cuda` to the desired CUDA version
|
||||
rm -rf /usr/local/cuda || true
|
||||
ln -s "/usr/local/cuda-${CUDA_VERSION}" /usr/local/cuda
|
||||
|
||||
# Switch `/usr/local/magma` to the desired CUDA version
|
||||
rm -rf /usr/local/magma || true
|
||||
ln -s /usr/local/cuda-${CUDA_VERSION}/magma /usr/local/magma
|
||||
|
||||
export CUDA_VERSION=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev) # 10.0.130
|
||||
export CUDA_VERSION_SHORT=$(ls /usr/local/cuda/lib64/libcudart.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev | cut -f1,2 -d".") # 10.0
|
||||
export CUDNN_VERSION=$(ls /usr/local/cuda/lib64/libcudnn.so.*|sort|tac | head -1 | rev | cut -d"." -f -3 | rev)
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
BUILD_SCRIPT=build_common.sh
|
||||
else
|
||||
BUILD_SCRIPT=build_libtorch.sh
|
||||
fi
|
||||
source $SCRIPTPATH/${BUILD_SCRIPT}
|
||||
@ -1,353 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# meant to be called only from the neighboring build.sh and build_cpu.sh scripts
|
||||
|
||||
set -e pipefail
|
||||
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
|
||||
# Require only one python installation
|
||||
if [[ -z "$DESIRED_PYTHON" ]]; then
|
||||
echo "Need to set DESIRED_PYTHON env variable"
|
||||
exit 1
|
||||
fi
|
||||
if [[ -n "$BUILD_PYTHONLESS" && -z "$LIBTORCH_VARIANT" ]]; then
|
||||
echo "BUILD_PYTHONLESS is set, so need LIBTORCH_VARIANT to also be set"
|
||||
echo "LIBTORCH_VARIANT should be one of shared-with-deps shared-without-deps static-with-deps static-without-deps"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Function to retry functions that sometimes timeout or have flaky failures
|
||||
retry () {
|
||||
$* || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
|
||||
}
|
||||
|
||||
# TODO move this into the Docker images
|
||||
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
retry yum install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
retry dnf install -q -y zip openssl
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
# TODO: Remove this once nvidia package repos are back online
|
||||
# Comment out nvidia repositories to prevent them from getting apt-get updated, see https://github.com/pytorch/pytorch/issues/74968
|
||||
# shellcheck disable=SC2046
|
||||
sed -i 's/.*nvidia.*/# &/' $(find /etc/apt/ -type f -name "*.list")
|
||||
retry apt-get update
|
||||
retry apt-get -y install zip openssl
|
||||
fi
|
||||
|
||||
# Version: setup.py uses $PYTORCH_BUILD_VERSION.post$PYTORCH_BUILD_NUMBER if
|
||||
# PYTORCH_BUILD_NUMBER > 1
|
||||
build_version="$PYTORCH_BUILD_VERSION"
|
||||
build_number="$PYTORCH_BUILD_NUMBER"
|
||||
if [[ -n "$OVERRIDE_PACKAGE_VERSION" ]]; then
|
||||
# This will be the *exact* version, since build_number<1
|
||||
build_version="$OVERRIDE_PACKAGE_VERSION"
|
||||
build_number=0
|
||||
fi
|
||||
if [[ -z "$build_version" ]]; then
|
||||
build_version=1.0.0
|
||||
fi
|
||||
if [[ -z "$build_number" ]]; then
|
||||
build_number=1
|
||||
fi
|
||||
export PYTORCH_BUILD_VERSION=$build_version
|
||||
export PYTORCH_BUILD_NUMBER=$build_number
|
||||
|
||||
export CMAKE_LIBRARY_PATH="/opt/intel/lib:/lib:$CMAKE_LIBRARY_PATH"
|
||||
export CMAKE_INCLUDE_PATH="/opt/intel/include:$CMAKE_INCLUDE_PATH"
|
||||
|
||||
# set OPENSSL_ROOT_DIR=/opt/openssl if it exists
|
||||
if [[ -e /opt/openssl ]]; then
|
||||
export OPENSSL_ROOT_DIR=/opt/openssl
|
||||
export CMAKE_INCLUDE_PATH="/opt/openssl/include":$CMAKE_INCLUDE_PATH
|
||||
fi
|
||||
|
||||
# If given a python version like 3.6m or 2.7mu, convert this to the format we
|
||||
# expect. The binary CI jobs pass in python versions like this; they also only
|
||||
# ever pass one python version, so we assume that DESIRED_PYTHON is not a list
|
||||
# in this case
|
||||
if [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then
|
||||
python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)"
|
||||
DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}"
|
||||
fi
|
||||
pydir="/opt/python/$DESIRED_PYTHON"
|
||||
export PATH="$pydir/bin:$PATH"
|
||||
|
||||
export PATCHELF_BIN=/usr/local/bin/patchelf
|
||||
patchelf_version=`$PATCHELF_BIN --version`
|
||||
echo "patchelf version: " $patchelf_version
|
||||
if [[ "$patchelf_version" == "patchelf 0.9" ]]; then
|
||||
echo "Your patchelf version is too old. Please use version >= 0.10."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
########################################################
|
||||
# Compile wheels as well as libtorch
|
||||
#######################################################
|
||||
if [[ -z "$PYTORCH_ROOT" ]]; then
|
||||
echo "Need to set PYTORCH_ROOT env variable"
|
||||
exit 1
|
||||
fi
|
||||
pushd "$PYTORCH_ROOT"
|
||||
python setup.py clean
|
||||
retry pip install -qr requirements.txt
|
||||
retry pip install -q numpy==2.0.1
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
export _GLIBCXX_USE_CXX11_ABI=1
|
||||
else
|
||||
export _GLIBCXX_USE_CXX11_ABI=0
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
echo "Calling build_amd.py at $(date)"
|
||||
python tools/amd_build/build_amd.py
|
||||
# TODO remove this work-around once pytorch sources are updated
|
||||
export ROCclr_DIR=/opt/rocm/rocclr/lib/cmake/rocclr
|
||||
fi
|
||||
|
||||
echo "Calling setup.py install at $(date)"
|
||||
|
||||
if [[ $LIBTORCH_VARIANT = *"static"* ]]; then
|
||||
STATIC_CMAKE_FLAG="-DTORCH_STATIC=1"
|
||||
fi
|
||||
|
||||
(
|
||||
set -x
|
||||
|
||||
mkdir -p build
|
||||
|
||||
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS="${EXTRA_CAFFE2_CMAKE_FLAGS[@]} $STATIC_CMAKE_FLAG" \
|
||||
# TODO: Remove this flag once https://github.com/pytorch/pytorch/issues/55952 is closed
|
||||
CFLAGS='-Wno-deprecated-declarations' \
|
||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=1 \
|
||||
python setup.py install
|
||||
|
||||
mkdir -p libtorch/{lib,bin,include,share}
|
||||
|
||||
# Make debug folder separate so it doesn't get zipped up with the rest of
|
||||
# libtorch
|
||||
mkdir debug
|
||||
|
||||
# Copy over all lib files
|
||||
cp -rv build/lib/* libtorch/lib/
|
||||
cp -rv build/lib*/torch/lib/* libtorch/lib/
|
||||
|
||||
# Copy over all include files
|
||||
cp -rv build/include/* libtorch/include/
|
||||
cp -rv build/lib*/torch/include/* libtorch/include/
|
||||
|
||||
# Copy over all of the cmake files
|
||||
cp -rv build/lib*/torch/share/* libtorch/share/
|
||||
|
||||
# Split libtorch into debug / release version
|
||||
cp libtorch/lib/libtorch_cpu.so libtorch/lib/libtorch_cpu.so.dbg
|
||||
|
||||
# Keep debug symbols on debug lib
|
||||
strip --only-keep-debug libtorch/lib/libtorch_cpu.so.dbg
|
||||
|
||||
# Remove debug info from release lib
|
||||
strip --strip-debug libtorch/lib/libtorch_cpu.so
|
||||
|
||||
# Add a debug link to the release lib to the debug lib (debuggers will then
|
||||
# search for symbols in a file called libtorch_cpu.so.dbg in some
|
||||
# predetermined locations) and embed a CRC32 of the debug library into the .so
|
||||
cd libtorch/lib
|
||||
|
||||
objcopy libtorch_cpu.so --add-gnu-debuglink=libtorch_cpu.so.dbg
|
||||
cd ../..
|
||||
|
||||
# Move the debug symbols to its own directory so it doesn't get processed /
|
||||
# zipped with all the other libraries
|
||||
mv libtorch/lib/libtorch_cpu.so.dbg debug/libtorch_cpu.so.dbg
|
||||
|
||||
echo "${PYTORCH_BUILD_VERSION}" > libtorch/build-version
|
||||
echo "$(pushd $PYTORCH_ROOT && git rev-parse HEAD)" > libtorch/build-hash
|
||||
|
||||
)
|
||||
|
||||
if [[ "$DESIRED_DEVTOOLSET" == *"cxx11-abi"* ]]; then
|
||||
LIBTORCH_ABI="cxx11-abi-"
|
||||
else
|
||||
LIBTORCH_ABI=
|
||||
fi
|
||||
|
||||
(
|
||||
set -x
|
||||
|
||||
mkdir -p /tmp/$LIBTORCH_HOUSE_DIR
|
||||
|
||||
# objcopy installs a CRC32 into libtorch_cpu above so, so add that to the name here
|
||||
CRC32=$(objcopy --dump-section .gnu_debuglink=>(tail -c4 | od -t x4 -An | xargs echo) libtorch/lib/libtorch_cpu.so)
|
||||
|
||||
# Zip debug symbols
|
||||
zip /tmp/$LIBTORCH_HOUSE_DIR/debug-libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION-$CRC32.zip debug/libtorch_cpu.so.dbg
|
||||
|
||||
# Zip and copy libtorch
|
||||
zip -rq /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip libtorch
|
||||
cp /tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-$PYTORCH_BUILD_VERSION.zip \
|
||||
/tmp/$LIBTORCH_HOUSE_DIR/libtorch-$LIBTORCH_ABI$LIBTORCH_VARIANT-latest.zip
|
||||
)
|
||||
|
||||
|
||||
popd
|
||||
|
||||
#######################################################################
|
||||
# ADD DEPENDENCIES INTO THE WHEEL
|
||||
#
|
||||
# auditwheel repair doesn't work correctly and is buggy
|
||||
# so manually do the work of copying dependency libs and patchelfing
|
||||
# and fixing RECORDS entries correctly
|
||||
######################################################################
|
||||
|
||||
fname_with_sha256() {
|
||||
HASH=$(sha256sum $1 | cut -c1-8)
|
||||
DIRNAME=$(dirname $1)
|
||||
BASENAME=$(basename $1)
|
||||
if [[ $BASENAME == "libnvrtc-builtins.so" || $BASENAME == "libcudnn"* ]]; then
|
||||
echo $1
|
||||
else
|
||||
INITNAME=$(echo $BASENAME | cut -f1 -d".")
|
||||
ENDNAME=$(echo $BASENAME | cut -f 2- -d".")
|
||||
echo "$DIRNAME/$INITNAME-$HASH.$ENDNAME"
|
||||
fi
|
||||
}
|
||||
|
||||
fname_without_so_number() {
|
||||
LINKNAME=$(echo $1 | sed -e 's/\.so.*/.so/g')
|
||||
echo "$LINKNAME"
|
||||
}
|
||||
|
||||
make_wheel_record() {
|
||||
FPATH=$1
|
||||
if echo $FPATH | grep RECORD >/dev/null 2>&1; then
|
||||
# if the RECORD file, then
|
||||
echo "$FPATH,,"
|
||||
else
|
||||
HASH=$(openssl dgst -sha256 -binary $FPATH | openssl base64 | sed -e 's/+/-/g' | sed -e 's/\//_/g' | sed -e 's/=//g')
|
||||
FSIZE=$(ls -nl $FPATH | awk '{print $5}')
|
||||
echo "$FPATH,sha256=$HASH,$FSIZE"
|
||||
fi
|
||||
}
|
||||
|
||||
echo 'Built this package:'
|
||||
(
|
||||
set -x
|
||||
mkdir -p /$LIBTORCH_HOUSE_DIR
|
||||
mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR
|
||||
rm -rf /tmp/$LIBTORCH_HOUSE_DIR
|
||||
)
|
||||
TMP_DIR=$(mktemp -d)
|
||||
trap "rm -rf ${TMP_DIR}" EXIT
|
||||
pushd "${TMP_DIR}"
|
||||
|
||||
for pkg in /$LIBTORCH_HOUSE_DIR/libtorch*.zip; do
|
||||
|
||||
# if the glob didn't match anything
|
||||
if [[ ! -e $pkg ]]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
rm -rf tmp
|
||||
mkdir -p tmp
|
||||
cd tmp
|
||||
cp $pkg .
|
||||
|
||||
unzip -q $(basename $pkg)
|
||||
rm -f $(basename $pkg)
|
||||
|
||||
PREFIX=libtorch
|
||||
|
||||
if [[ $pkg != *"without-deps"* ]]; then
|
||||
# copy over needed dependent .so files over and tag them with their hash
|
||||
patched=()
|
||||
for filepath in "${DEPS_LIST[@]}"; do
|
||||
filename=$(basename $filepath)
|
||||
destpath=$PREFIX/lib/$filename
|
||||
if [[ "$filepath" != "$destpath" ]]; then
|
||||
cp $filepath $destpath
|
||||
fi
|
||||
|
||||
if [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
patchedpath=$(fname_without_so_number $destpath)
|
||||
else
|
||||
patchedpath=$(fname_with_sha256 $destpath)
|
||||
fi
|
||||
patchedname=$(basename $patchedpath)
|
||||
if [[ "$destpath" != "$patchedpath" ]]; then
|
||||
mv $destpath $patchedpath
|
||||
fi
|
||||
patched+=("$patchedname")
|
||||
echo "Copied $filepath to $patchedpath"
|
||||
done
|
||||
|
||||
echo "patching to fix the so names to the hashed names"
|
||||
for ((i=0;i<${#DEPS_LIST[@]};++i)); do
|
||||
find $PREFIX -name '*.so*' | while read sofile; do
|
||||
origname=${DEPS_SONAME[i]}
|
||||
patchedname=${patched[i]}
|
||||
if [[ "$origname" != "$patchedname" ]] || [[ "$DESIRED_CUDA" == *"rocm"* ]]; then
|
||||
set +e
|
||||
origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*")
|
||||
ERRCODE=$?
|
||||
set -e
|
||||
if [ "$ERRCODE" -eq "0" ]; then
|
||||
echo "patching $sofile entry $origname to $patchedname"
|
||||
$PATCHELF_BIN --replace-needed $origname $patchedname $sofile
|
||||
fi
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
# copy over needed auxiliary files
|
||||
for ((i=0;i<${#DEPS_AUX_SRCLIST[@]};++i)); do
|
||||
srcpath=${DEPS_AUX_SRCLIST[i]}
|
||||
dstpath=$PREFIX/${DEPS_AUX_DSTLIST[i]}
|
||||
mkdir -p $(dirname $dstpath)
|
||||
cp $srcpath $dstpath
|
||||
done
|
||||
fi
|
||||
|
||||
# set RPATH of _C.so and similar to $ORIGIN, $ORIGIN/lib
|
||||
find $PREFIX -maxdepth 1 -type f -name "*.so*" | while read sofile; do
|
||||
echo "Setting rpath of $sofile to " '$ORIGIN:$ORIGIN/lib'
|
||||
$PATCHELF_BIN --set-rpath '$ORIGIN:$ORIGIN/lib' $sofile
|
||||
$PATCHELF_BIN --print-rpath $sofile
|
||||
done
|
||||
|
||||
# set RPATH of lib/ files to $ORIGIN
|
||||
find $PREFIX/lib -maxdepth 1 -type f -name "*.so*" | while read sofile; do
|
||||
echo "Setting rpath of $sofile to " '$ORIGIN'
|
||||
$PATCHELF_BIN --set-rpath '$ORIGIN' $sofile
|
||||
$PATCHELF_BIN --print-rpath $sofile
|
||||
done
|
||||
|
||||
# regenerate the RECORD file with new hashes
|
||||
record_file=`echo $(basename $pkg) | sed -e 's/-cp.*$/.dist-info\/RECORD/g'`
|
||||
if [[ -e $record_file ]]; then
|
||||
echo "Generating new record file $record_file"
|
||||
rm -f $record_file
|
||||
# generate records for folders in wheel
|
||||
find * -type f | while read fname; do
|
||||
echo $(make_wheel_record $fname) >>$record_file
|
||||
done
|
||||
fi
|
||||
|
||||
# zip up the wheel back
|
||||
zip -rq $(basename $pkg) $PREFIX*
|
||||
|
||||
# replace original wheel
|
||||
rm -f $pkg
|
||||
mv $(basename $pkg) $pkg
|
||||
cd ..
|
||||
rm -rf tmp
|
||||
done
|
||||
|
||||
# Copy wheels to host machine for persistence before testing
|
||||
if [[ -n "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
cp /$LIBTORCH_HOUSE_DIR/libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
cp /$LIBTORCH_HOUSE_DIR/debug-libtorch*.zip "$PYTORCH_FINAL_PACKAGE_DIR"
|
||||
fi
|
||||
@ -1,263 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
export ROCM_HOME=/opt/rocm
|
||||
export MAGMA_HOME=$ROCM_HOME/magma
|
||||
# TODO: libtorch_cpu.so is broken when building with Debug info
|
||||
export BUILD_DEBUG_INFO=0
|
||||
|
||||
# TODO Are these all used/needed?
|
||||
export TH_BINARY_BUILD=1
|
||||
export USE_STATIC_CUDNN=1
|
||||
export USE_STATIC_NCCL=1
|
||||
export ATEN_STATIC_CUDA=1
|
||||
export USE_CUDA_STATIC_LINK=1
|
||||
export INSTALL_TEST=0 # dont install test binaries into site-packages
|
||||
# Set RPATH instead of RUNPATH when using patchelf to avoid LD_LIBRARY_PATH override
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build()
|
||||
CMAKE_ARGS=()
|
||||
fi
|
||||
if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build_caffe2()
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=()
|
||||
fi
|
||||
|
||||
# Determine ROCm version and architectures to build for
|
||||
#
|
||||
# NOTE: We should first check `DESIRED_CUDA` when determining `ROCM_VERSION`
|
||||
if [[ -n "$DESIRED_CUDA" ]]; then
|
||||
if ! echo "${DESIRED_CUDA}"| grep "^rocm" >/dev/null 2>/dev/null; then
|
||||
export DESIRED_CUDA="rocm${DESIRED_CUDA}"
|
||||
fi
|
||||
# rocm3.7, rocm3.5.1
|
||||
ROCM_VERSION="$DESIRED_CUDA"
|
||||
echo "Using $ROCM_VERSION as determined by DESIRED_CUDA"
|
||||
else
|
||||
echo "Must set DESIRED_CUDA"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Package directories
|
||||
WHEELHOUSE_DIR="wheelhouse$ROCM_VERSION"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_house$ROCM_VERSION"
|
||||
if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhouse$ROCM_VERSION"
|
||||
else
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_house$ROCM_VERSION"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
|
||||
# To make version comparison easier, create an integer representation.
|
||||
ROCM_VERSION_CLEAN=$(echo ${ROCM_VERSION} | sed s/rocm//)
|
||||
save_IFS="$IFS"
|
||||
IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION_CLEAN})
|
||||
IFS="$save_IFS"
|
||||
if [[ ${#ROCM_VERSION_ARRAY[@]} == 2 ]]; then
|
||||
ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
|
||||
ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
|
||||
ROCM_VERSION_PATCH=0
|
||||
elif [[ ${#ROCM_VERSION_ARRAY[@]} == 3 ]]; then
|
||||
ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
|
||||
ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
|
||||
ROCM_VERSION_PATCH=${ROCM_VERSION_ARRAY[2]}
|
||||
else
|
||||
echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH))
|
||||
|
||||
# Required ROCm libraries
|
||||
ROCM_SO_FILES=(
|
||||
"libMIOpen.so"
|
||||
"libamdhip64.so"
|
||||
"libhipblas.so"
|
||||
"libhipfft.so"
|
||||
"libhiprand.so"
|
||||
"libhipsolver.so"
|
||||
"libhipsparse.so"
|
||||
"libhsa-runtime64.so"
|
||||
"libamd_comgr.so"
|
||||
"libmagma.so"
|
||||
"librccl.so"
|
||||
"librocblas.so"
|
||||
"librocfft.so"
|
||||
"librocm_smi64.so"
|
||||
"librocrand.so"
|
||||
"librocsolver.so"
|
||||
"librocsparse.so"
|
||||
"libroctracer64.so"
|
||||
"libroctx64.so"
|
||||
"libhipblaslt.so"
|
||||
"libhiprtc.so"
|
||||
)
|
||||
|
||||
if [[ $ROCM_INT -ge 60100 ]]; then
|
||||
ROCM_SO_FILES+=("librocprofiler-register.so")
|
||||
fi
|
||||
|
||||
if [[ $ROCM_INT -ge 60200 ]]; then
|
||||
ROCM_SO_FILES+=("librocm-core.so")
|
||||
fi
|
||||
|
||||
OS_NAME=`awk -F= '/^NAME/{print $2}' /etc/os-release`
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
LIBNUMA_PATH="/usr/lib64/libnuma.so.1"
|
||||
LIBELF_PATH="/usr/lib64/libelf.so.1"
|
||||
LIBTINFO_PATH="/usr/lib64/libtinfo.so.5"
|
||||
LIBDRM_PATH="/opt/amdgpu/lib64/libdrm.so.2"
|
||||
LIBDRM_AMDGPU_PATH="/opt/amdgpu/lib64/libdrm_amdgpu.so.1"
|
||||
if [[ $ROCM_INT -ge 60100 ]]; then
|
||||
# Below libs are direct dependencies of libhipsolver
|
||||
LIBSUITESPARSE_CONFIG_PATH="/lib64/libsuitesparseconfig.so.4"
|
||||
LIBCHOLMOD_PATH="/lib64/libcholmod.so.2"
|
||||
# Below libs are direct dependencies of libcholmod
|
||||
LIBAMD_PATH="/lib64/libamd.so.2"
|
||||
LIBCAMD_PATH="/lib64/libcamd.so.2"
|
||||
LIBCCOLAMD_PATH="/lib64/libccolamd.so.2"
|
||||
LIBCOLAMD_PATH="/lib64/libcolamd.so.2"
|
||||
LIBSATLAS_PATH="/lib64/atlas/libsatlas.so.3"
|
||||
# Below libs are direct dependencies of libsatlas
|
||||
LIBGFORTRAN_PATH="/lib64/libgfortran.so.3"
|
||||
LIBQUADMATH_PATH="/lib64/libquadmath.so.0"
|
||||
fi
|
||||
MAYBE_LIB64=lib64
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
|
||||
LIBNUMA_PATH="/usr/lib/x86_64-linux-gnu/libnuma.so.1"
|
||||
LIBELF_PATH="/usr/lib/x86_64-linux-gnu/libelf.so.1"
|
||||
if [[ $ROCM_INT -ge 50300 ]]; then
|
||||
LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.6"
|
||||
else
|
||||
LIBTINFO_PATH="/lib/x86_64-linux-gnu/libtinfo.so.5"
|
||||
fi
|
||||
LIBDRM_PATH="/usr/lib/x86_64-linux-gnu/libdrm.so.2"
|
||||
LIBDRM_AMDGPU_PATH="/usr/lib/x86_64-linux-gnu/libdrm_amdgpu.so.1"
|
||||
if [[ $ROCM_INT -ge 60100 ]]; then
|
||||
# Below libs are direct dependencies of libhipsolver
|
||||
LIBCHOLMOD_PATH="/lib/x86_64-linux-gnu/libcholmod.so.3"
|
||||
# Below libs are direct dependencies of libcholmod
|
||||
LIBSUITESPARSE_CONFIG_PATH="/lib/x86_64-linux-gnu/libsuitesparseconfig.so.5"
|
||||
LIBAMD_PATH="/lib/x86_64-linux-gnu/libamd.so.2"
|
||||
LIBCAMD_PATH="/lib/x86_64-linux-gnu/libcamd.so.2"
|
||||
LIBCCOLAMD_PATH="/lib/x86_64-linux-gnu/libccolamd.so.2"
|
||||
LIBCOLAMD_PATH="/lib/x86_64-linux-gnu/libcolamd.so.2"
|
||||
LIBMETIS_PATH="/lib/x86_64-linux-gnu/libmetis.so.5"
|
||||
LIBLAPACK_PATH="/lib/x86_64-linux-gnu/liblapack.so.3"
|
||||
LIBBLAS_PATH="/lib/x86_64-linux-gnu/libblas.so.3"
|
||||
# Below libs are direct dependencies of libblas
|
||||
LIBGFORTRAN_PATH="/lib/x86_64-linux-gnu/libgfortran.so.5"
|
||||
LIBQUADMATH_PATH="/lib/x86_64-linux-gnu/libquadmath.so.0"
|
||||
fi
|
||||
MAYBE_LIB64=lib
|
||||
fi
|
||||
OS_SO_PATHS=($LIBGOMP_PATH $LIBNUMA_PATH\
|
||||
$LIBELF_PATH $LIBTINFO_PATH\
|
||||
$LIBDRM_PATH $LIBDRM_AMDGPU_PATH\
|
||||
$LIBSUITESPARSE_CONFIG_PATH\
|
||||
$LIBCHOLMOD_PATH $LIBAMD_PATH\
|
||||
$LIBCAMD_PATH $LIBCCOLAMD_PATH\
|
||||
$LIBCOLAMD_PATH $LIBSATLAS_PATH\
|
||||
$LIBGFORTRAN_PATH $LIBQUADMATH_PATH\
|
||||
$LIBMETIS_PATH $LIBLAPACK_PATH\
|
||||
$LIBBLAS_PATH)
|
||||
OS_SO_FILES=()
|
||||
for lib in "${OS_SO_PATHS[@]}"
|
||||
do
|
||||
file_name="${lib##*/}" # Substring removal of path to get filename
|
||||
OS_SO_FILES[${#OS_SO_FILES[@]}]=$file_name # Append lib to array
|
||||
done
|
||||
|
||||
# PyTorch-version specific
|
||||
# AOTriton dependency only for PyTorch >= 2.4
|
||||
if (( $(echo "${PYTORCH_VERSION} 2.4" | awk '{print ($1 >= $2)}') )); then
|
||||
ROCM_SO_FILES+=("libaotriton_v2.so")
|
||||
fi
|
||||
|
||||
# rocBLAS library files
|
||||
ROCBLAS_LIB_SRC=$ROCM_HOME/lib/rocblas/library
|
||||
ROCBLAS_LIB_DST=lib/rocblas/library
|
||||
ARCH=$(echo $PYTORCH_ROCM_ARCH | sed 's/;/|/g') # Replace ; seperated arch list to bar for grep
|
||||
ARCH_SPECIFIC_FILES=$(ls $ROCBLAS_LIB_SRC | grep -E $ARCH)
|
||||
OTHER_FILES=$(ls $ROCBLAS_LIB_SRC | grep -v gfx)
|
||||
ROCBLAS_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)
|
||||
|
||||
# hipblaslt library files
|
||||
HIPBLASLT_LIB_SRC=$ROCM_HOME/lib/hipblaslt/library
|
||||
HIPBLASLT_LIB_DST=lib/hipblaslt/library
|
||||
ARCH_SPECIFIC_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -E $ARCH)
|
||||
OTHER_FILES=$(ls $HIPBLASLT_LIB_SRC | grep -v gfx)
|
||||
HIPBLASLT_LIB_FILES=($ARCH_SPECIFIC_FILES $OTHER_FILES)
|
||||
|
||||
# ROCm library files
|
||||
ROCM_SO_PATHS=()
|
||||
for lib in "${ROCM_SO_FILES[@]}"
|
||||
do
|
||||
file_path=($(find $ROCM_HOME/lib/ -name "$lib")) # First search in lib
|
||||
if [[ -z $file_path ]]; then
|
||||
if [ -d "$ROCM_HOME/lib64/" ]; then
|
||||
file_path=($(find $ROCM_HOME/lib64/ -name "$lib")) # Then search in lib64
|
||||
fi
|
||||
fi
|
||||
if [[ -z $file_path ]]; then
|
||||
file_path=($(find $ROCM_HOME/ -name "$lib")) # Then search in ROCM_HOME
|
||||
fi
|
||||
if [[ -z $file_path ]]; then
|
||||
echo "Error: Library file $lib is not found." >&2
|
||||
exit 1
|
||||
fi
|
||||
ROCM_SO_PATHS[${#ROCM_SO_PATHS[@]}]="$file_path" # Append lib to array
|
||||
done
|
||||
|
||||
DEPS_LIST=(
|
||||
${ROCM_SO_PATHS[*]}
|
||||
${OS_SO_PATHS[*]}
|
||||
)
|
||||
|
||||
DEPS_SONAME=(
|
||||
${ROCM_SO_FILES[*]}
|
||||
${OS_SO_FILES[*]}
|
||||
)
|
||||
|
||||
DEPS_AUX_SRCLIST=(
|
||||
"${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_SRC/}"
|
||||
"${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_SRC/}"
|
||||
"/opt/amdgpu/share/libdrm/amdgpu.ids"
|
||||
)
|
||||
|
||||
DEPS_AUX_DSTLIST=(
|
||||
"${ROCBLAS_LIB_FILES[@]/#/$ROCBLAS_LIB_DST/}"
|
||||
"${HIPBLASLT_LIB_FILES[@]/#/$HIPBLASLT_LIB_DST/}"
|
||||
"share/libdrm/amdgpu.ids"
|
||||
)
|
||||
|
||||
# MIOpen library files
|
||||
MIOPEN_SHARE_SRC=$ROCM_HOME/share/miopen/db
|
||||
MIOPEN_SHARE_DST=share/miopen/db
|
||||
MIOPEN_SHARE_FILES=($(ls $MIOPEN_SHARE_SRC | grep -E $ARCH))
|
||||
DEPS_AUX_SRCLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_SRC/})
|
||||
DEPS_AUX_DSTLIST+=(${MIOPEN_SHARE_FILES[@]/#/$MIOPEN_SHARE_DST/})
|
||||
|
||||
# RCCL library files
|
||||
RCCL_SHARE_SRC=$ROCM_HOME/share/rccl/msccl-algorithms
|
||||
RCCL_SHARE_DST=share/rccl/msccl-algorithms
|
||||
RCCL_SHARE_FILES=($(ls $RCCL_SHARE_SRC))
|
||||
DEPS_AUX_SRCLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_SRC/})
|
||||
DEPS_AUX_DSTLIST+=(${RCCL_SHARE_FILES[@]/#/$RCCL_SHARE_DST/})
|
||||
|
||||
echo "PYTORCH_ROCM_ARCH: ${PYTORCH_ROCM_ARCH}"
|
||||
|
||||
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
BUILD_SCRIPT=build_common.sh
|
||||
else
|
||||
BUILD_SCRIPT=build_libtorch.sh
|
||||
fi
|
||||
source $SCRIPTPATH/${BUILD_SCRIPT}
|
||||
@ -1,108 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -ex
|
||||
|
||||
export TH_BINARY_BUILD=1
|
||||
export USE_CUDA=0
|
||||
|
||||
# Keep an array of cmake variables to add to
|
||||
if [[ -z "$CMAKE_ARGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build()
|
||||
CMAKE_ARGS=()
|
||||
fi
|
||||
if [[ -z "$EXTRA_CAFFE2_CMAKE_FLAGS" ]]; then
|
||||
# These are passed to tools/build_pytorch_libs.sh::build_caffe2()
|
||||
EXTRA_CAFFE2_CMAKE_FLAGS=()
|
||||
fi
|
||||
|
||||
|
||||
# Refer https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
|
||||
source /opt/intel/oneapi/compiler/latest/env/vars.sh
|
||||
source /opt/intel/oneapi/pti/latest/env/vars.sh
|
||||
source /opt/intel/oneapi/umf/latest/env/vars.sh
|
||||
export USE_STATIC_MKL=1
|
||||
|
||||
WHEELHOUSE_DIR="wheelhousexpu"
|
||||
LIBTORCH_HOUSE_DIR="libtorch_housexpu"
|
||||
if [[ -z "$PYTORCH_FINAL_PACKAGE_DIR" ]]; then
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/wheelhousexpu"
|
||||
else
|
||||
PYTORCH_FINAL_PACKAGE_DIR="/remote/libtorch_housexpu"
|
||||
fi
|
||||
fi
|
||||
mkdir -p "$PYTORCH_FINAL_PACKAGE_DIR" || true
|
||||
|
||||
OS_NAME=$(awk -F= '/^NAME/{print $2}' /etc/os-release)
|
||||
if [[ "$OS_NAME" == *"CentOS Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Red Hat Enterprise Linux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"AlmaLinux"* ]]; then
|
||||
LIBGOMP_PATH="/usr/lib64/libgomp.so.1"
|
||||
elif [[ "$OS_NAME" == *"Ubuntu"* ]]; then
|
||||
if [[ "$(uname -m)" == "s390x" ]]; then
|
||||
LIBGOMP_PATH="/usr/lib/s390x-linux-gnu/libgomp.so.1"
|
||||
else
|
||||
LIBGOMP_PATH="/usr/lib/x86_64-linux-gnu/libgomp.so.1"
|
||||
fi
|
||||
fi
|
||||
|
||||
DEPS_LIST=(
|
||||
"$LIBGOMP_PATH"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libOpenCL.so.1"
|
||||
)
|
||||
|
||||
DEPS_SONAME=(
|
||||
"libgomp.so.1"
|
||||
"libOpenCL.so.1"
|
||||
)
|
||||
|
||||
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
|
||||
echo "Bundling with xpu support package libs."
|
||||
DEPS_LIST+=(
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libsycl.so.8"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libur_loader.so.0"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libur_adapter_level_zero.so.0"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libur_adapter_opencl.so.0"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libsvml.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libirng.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libimf.so"
|
||||
"/opt/intel/oneapi/compiler/latest/lib/libintlc.so.5"
|
||||
"/opt/intel/oneapi/pti/latest/lib/libpti_view.so.0.10"
|
||||
"/opt/intel/oneapi/umf/latest/lib/libumf.so.0"
|
||||
"/opt/intel/oneapi/tcm/latest/lib/libhwloc.so.15"
|
||||
)
|
||||
DEPS_SONAME+=(
|
||||
"libsycl.so.8"
|
||||
"libur_loader.so.0"
|
||||
"libur_adapter_level_zero.so.0"
|
||||
"libur_adapter_opencl.so.0"
|
||||
"libsvml.so"
|
||||
"libirng.so"
|
||||
"libimf.so"
|
||||
"libintlc.so.5"
|
||||
"libpti_view.so.0.10"
|
||||
"libumf.so.0"
|
||||
"libhwloc.so.15"
|
||||
)
|
||||
else
|
||||
echo "Using xpu runtime libs from pypi."
|
||||
XPU_RPATHS=(
|
||||
'$ORIGIN/../../../..'
|
||||
)
|
||||
XPU_RPATHS=$(IFS=: ; echo "${XPU_RPATHS[*]}")
|
||||
export C_SO_RPATH=$XPU_RPATHS':$ORIGIN:$ORIGIN/lib'
|
||||
export LIB_SO_RPATH=$XPU_RPATHS':$ORIGIN'
|
||||
export FORCE_RPATH="--force-rpath"
|
||||
fi
|
||||
|
||||
rm -rf /usr/local/cuda*
|
||||
|
||||
SOURCE_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null && pwd )"
|
||||
if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
||||
BUILD_SCRIPT=build_common.sh
|
||||
else
|
||||
BUILD_SCRIPT=build_libtorch.sh
|
||||
fi
|
||||
source ${SOURCE_DIR}/${BUILD_SCRIPT}
|
||||
@ -1,30 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Require only one python installation
|
||||
if [[ -z "$DESIRED_PYTHON" ]]; then
|
||||
echo "Need to set DESIRED_PYTHON env variable"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# If given a python version like 3.6m or 2.7mu, convert this to the format we
|
||||
# expect. The binary CI jobs pass in python versions like this; they also only
|
||||
# ever pass one python version, so we assume that DESIRED_PYTHON is not a list
|
||||
# in this case
|
||||
if [[ -n "$DESIRED_PYTHON" && $DESIRED_PYTHON =~ ([0-9].[0-9]+)t ]]; then
|
||||
python_digits="$(echo $DESIRED_PYTHON | tr -cd [:digit:])"
|
||||
py_majmin="${DESIRED_PYTHON}"
|
||||
DESIRED_PYTHON="cp${python_digits}-cp${python_digits}t"
|
||||
elif [[ -n "$DESIRED_PYTHON" && "$DESIRED_PYTHON" != cp* ]]; then
|
||||
python_nodot="$(echo $DESIRED_PYTHON | tr -d m.u)"
|
||||
DESIRED_PYTHON="cp${python_nodot}-cp${python_nodot}"
|
||||
if [[ ${python_nodot} -ge 310 ]]; then
|
||||
py_majmin="${DESIRED_PYTHON:2:1}.${DESIRED_PYTHON:3:2}"
|
||||
else
|
||||
py_majmin="${DESIRED_PYTHON:2:1}.${DESIRED_PYTHON:3:1}"
|
||||
fi
|
||||
fi
|
||||
|
||||
pydir="/opt/python/$DESIRED_PYTHON"
|
||||
export DESIRED_PYTHON_BIN_DIR="${pydir}/bin"
|
||||
export PATH="$DESIRED_PYTHON_BIN_DIR:$PATH"
|
||||
echo "Will build for Python version: ${DESIRED_PYTHON}"
|
||||
@ -1,26 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
set -e
|
||||
|
||||
yum install -y wget git
|
||||
|
||||
rm -rf /usr/local/cuda*
|
||||
|
||||
# Install Anaconda
|
||||
if ! ls /py
|
||||
then
|
||||
echo "Miniconda needs to be installed"
|
||||
wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
|
||||
bash ~/miniconda.sh -b -p /py
|
||||
else
|
||||
echo "Miniconda is already installed"
|
||||
fi
|
||||
|
||||
export PATH="/py/bin:$PATH"
|
||||
|
||||
# Anaconda token
|
||||
if ls /remote/token
|
||||
then
|
||||
source /remote/token
|
||||
fi
|
||||
|
||||
conda install -y conda-build anaconda-client
|
||||
@ -178,7 +178,7 @@ fi
|
||||
# sccache will fail for CUDA builds if all cores are used for compiling
|
||||
# gcc 7 with sccache seems to have intermittent OOM issue if all cores are used
|
||||
if [ -z "$MAX_JOBS" ]; then
|
||||
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; } && which sccache > /dev/null; then
|
||||
if { [[ "$BUILD_ENVIRONMENT" == *cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *gcc7* ]]; } && which sccache > /dev/null; then
|
||||
export MAX_JOBS=$(($(nproc) - 1))
|
||||
fi
|
||||
fi
|
||||
@ -203,12 +203,10 @@ if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
export USE_CUDA=1
|
||||
fi
|
||||
export LDSHARED="clang --shared"
|
||||
export USE_CUDA=0
|
||||
export USE_ASAN=1
|
||||
export REL_WITH_DEB_INFO=1
|
||||
export UBSAN_FLAGS="-fno-sanitize-recover=all"
|
||||
export UBSAN_FLAGS="-fno-sanitize-recover=all;-fno-sanitize=float-divide-by-zero;-fno-sanitize=float-cast-overflow"
|
||||
unset USE_LLVM
|
||||
fi
|
||||
|
||||
@ -220,6 +218,10 @@ if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
|
||||
export USE_PRECOMPILED_HEADERS=1
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *linux-focal-py3.7-gcc7-build* ]]; then
|
||||
export USE_GLOO_WITH_OPENSSL=ON
|
||||
fi
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" != *android* && "${BUILD_ENVIRONMENT}" != *cuda* ]]; then
|
||||
export BUILD_STATIC_RUNTIME_BENCHMARK=ON
|
||||
fi
|
||||
@ -276,6 +278,7 @@ else
|
||||
# set only when building other architectures
|
||||
# or building non-XLA tests.
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* &&
|
||||
"$BUILD_ENVIRONMENT" != *s390x* &&
|
||||
"$BUILD_ENVIRONMENT" != *xla* ]]; then
|
||||
if [[ "$BUILD_ENVIRONMENT" != *py3.8* ]]; then
|
||||
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
|
||||
@ -285,7 +288,8 @@ else
|
||||
WERROR=1 python setup.py clean
|
||||
|
||||
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||
python3 tools/packaging/split_wheel.py bdist_wheel
|
||||
BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 python setup.py bdist_wheel
|
||||
BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 python setup.py bdist_wheel --cmake
|
||||
else
|
||||
WERROR=1 python setup.py bdist_wheel
|
||||
fi
|
||||
@ -397,6 +401,8 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
|
||||
python tools/stats/export_test_times.py
|
||||
fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" != *s390x* ]]; then
|
||||
# snadampal: skipping it till sccache support added for aarch64
|
||||
# https://github.com/pytorch/pytorch/issues/121559
|
||||
if [[ "$BUILD_ENVIRONMENT" != *aarch64* && "$BUILD_ENVIRONMENT" != *s390x* ]]; then
|
||||
print_sccache_stats
|
||||
fi
|
||||
|
||||
@ -6,12 +6,6 @@ if [[ "$BUILD_ENVIRONMENT" != *win-* ]]; then
|
||||
# Save the absolute path in case later we chdir (as occurs in the gpu perf test)
|
||||
script_dir="$( cd "$(dirname "${BASH_SOURCE[0]}")" || exit ; pwd -P )"
|
||||
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *-pch* ]]; then
|
||||
# This is really weird, but newer sccache somehow produces broken binary
|
||||
# see https://github.com/pytorch/pytorch/issues/139188
|
||||
sudo mv /opt/cache/bin/sccache-0.2.14a /opt/cache/bin/sccache
|
||||
fi
|
||||
|
||||
if which sccache > /dev/null; then
|
||||
# Save sccache logs to file
|
||||
sccache --stop-server > /dev/null 2>&1 || true
|
||||
|
||||
@ -81,15 +81,14 @@ function pip_install_whl() {
|
||||
|
||||
function pip_install() {
|
||||
# retry 3 times
|
||||
pip_install_pkg="python3 -m pip install --progress-bar off"
|
||||
${pip_install_pkg} "$@" || \
|
||||
${pip_install_pkg} "$@" || \
|
||||
${pip_install_pkg} "$@"
|
||||
# old versions of pip don't have the "--progress-bar" flag
|
||||
pip install --progress-bar off "$@" || pip install --progress-bar off "$@" || pip install --progress-bar off "$@" ||\
|
||||
pip install "$@" || pip install "$@" || pip install "$@"
|
||||
}
|
||||
|
||||
function pip_uninstall() {
|
||||
# uninstall 2 times
|
||||
pip3 uninstall -y "$@" || pip3 uninstall -y "$@"
|
||||
pip uninstall -y "$@" || pip uninstall -y "$@"
|
||||
}
|
||||
|
||||
function get_exit_code() {
|
||||
@ -105,9 +104,9 @@ function get_bazel() {
|
||||
# version of Bazelisk to fetch the platform specific version of
|
||||
# Bazel to use from .bazelversion.
|
||||
retry curl --location --output tools/bazel \
|
||||
https://raw.githubusercontent.com/bazelbuild/bazelisk/v1.23.0/bazelisk.py
|
||||
https://raw.githubusercontent.com/bazelbuild/bazelisk/v1.16.0/bazelisk.py
|
||||
shasum --algorithm=1 --check \
|
||||
<(echo '01df9cf7f08dd80d83979ed0d0666a99349ae93c tools/bazel')
|
||||
<(echo 'd4369c3d293814d3188019c9f7527a948972d9f8 tools/bazel')
|
||||
chmod u+x tools/bazel
|
||||
}
|
||||
|
||||
@ -241,12 +240,6 @@ function checkout_install_torchbench() {
|
||||
popd
|
||||
}
|
||||
|
||||
function install_torchao() {
|
||||
local commit
|
||||
commit=$(get_pinned_commit torchao)
|
||||
pip_install --no-use-pep517 --user "git+https://github.com/pytorch/ao.git@${commit}"
|
||||
}
|
||||
|
||||
function print_sccache_stats() {
|
||||
echo 'PyTorch Build Statistics'
|
||||
sccache --show-stats
|
||||
|
||||
@ -45,7 +45,8 @@ def create_cert(path, C, ST, L, O, key):
|
||||
.not_valid_before(datetime.now(timezone.utc))
|
||||
.not_valid_after(
|
||||
# Our certificate will be valid for 10 days
|
||||
datetime.now(timezone.utc) + timedelta(days=10)
|
||||
datetime.now(timezone.utc)
|
||||
+ timedelta(days=10)
|
||||
)
|
||||
.add_extension(
|
||||
x509.BasicConstraints(ca=True, path_length=None),
|
||||
@ -90,7 +91,8 @@ def sign_certificate_request(path, csr_cert, ca_cert, private_ca_key):
|
||||
.not_valid_before(datetime.now(timezone.utc))
|
||||
.not_valid_after(
|
||||
# Our certificate will be valid for 10 days
|
||||
datetime.now(timezone.utc) + timedelta(days=10)
|
||||
datetime.now(timezone.utc)
|
||||
+ timedelta(days=10)
|
||||
# Sign our certificate with our private key
|
||||
)
|
||||
.sign(private_ca_key, hashes.SHA256())
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
#!/bin/bash
|
||||
set -x
|
||||
|
||||
# shellcheck disable=SC2034
|
||||
# shellcheck source=./macos-common.sh
|
||||
@ -149,153 +148,21 @@ test_jit_hooks() {
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
torchbench_setup_macos() {
|
||||
git clone --recursive https://github.com/pytorch/vision torchvision
|
||||
git clone --recursive https://github.com/pytorch/audio torchaudio
|
||||
|
||||
pushd torchvision
|
||||
git fetch
|
||||
git checkout "$(cat ../.github/ci_commit_pins/vision.txt)"
|
||||
git submodule update --init --recursive
|
||||
python setup.py clean
|
||||
python setup.py develop
|
||||
popd
|
||||
|
||||
pushd torchaudio
|
||||
git fetch
|
||||
git checkout "$(cat ../.github/ci_commit_pins/audio.txt)"
|
||||
git submodule update --init --recursive
|
||||
python setup.py clean
|
||||
python setup.py develop
|
||||
popd
|
||||
|
||||
# Shellcheck doesn't like it when you pass no arguments to a function that can take args. See https://www.shellcheck.net/wiki/SC2120
|
||||
# shellcheck disable=SC2119,SC2120
|
||||
checkout_install_torchbench
|
||||
}
|
||||
|
||||
conda_benchmark_deps() {
|
||||
conda install -y astunparse numpy scipy ninja pyyaml setuptools cmake typing-extensions requests protobuf numba cython scikit-learn
|
||||
conda install -y -c conda-forge librosa
|
||||
}
|
||||
|
||||
|
||||
test_torchbench_perf() {
|
||||
print_cmake_info
|
||||
|
||||
echo "Launching torchbench setup"
|
||||
conda_benchmark_deps
|
||||
torchbench_setup_macos
|
||||
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
echo "Setup complete, launching torchbench training performance run"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
|
||||
echo "Launching torchbench inference performance run"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
|
||||
echo "Pytorch benchmark on mps device completed"
|
||||
}
|
||||
|
||||
test_torchbench_smoketest() {
|
||||
print_cmake_info
|
||||
|
||||
echo "Launching torchbench setup"
|
||||
conda_benchmark_deps
|
||||
# shellcheck disable=SC2119,SC2120
|
||||
torchbench_setup_macos
|
||||
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
touch "$TEST_REPORTS_DIR"/torchbench_training.csv
|
||||
touch "$TEST_REPORTS_DIR"/torchbench_inference.csv
|
||||
|
||||
echo "Setup complete, launching torchbench training performance run"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_T5 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only llama --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only BERT_pytorch --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only dcgan --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_GPT2 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only yolov3 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only resnet152 --backend eager --training --devices mps --output "$TEST_REPORTS_DIR/torchbench_training.csv"
|
||||
|
||||
echo "Launching torchbench inference performance run"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_T5 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only llama --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only BERT_pytorch --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only dcgan --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only hf_GPT2 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only yolov3 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py --performance --only resnet152 --backend eager --inference --devices mps --output "$TEST_REPORTS_DIR/torchbench_inference.csv"
|
||||
|
||||
echo "Pytorch benchmark on mps device completed"
|
||||
}
|
||||
|
||||
test_hf_perf() {
|
||||
print_cmake_info
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
conda_benchmark_deps
|
||||
torchbench_setup_macos
|
||||
|
||||
echo "Launching HuggingFace training perf run"
|
||||
python "$(pwd)"/benchmarks/dynamo/huggingface.py --backend eager --device mps --performance --training --output="${TEST_REPORTS_DIR}"/hf_training.csv
|
||||
|
||||
echo "Launching HuggingFace inference perf run"
|
||||
python "$(pwd)"/benchmarks/dynamo/huggingface.py --backend eager --device mps --performance --training --output="${TEST_REPORTS_DIR}"/hf_inference.csv
|
||||
|
||||
echo "HuggingFace benchmark on mps device completed"
|
||||
}
|
||||
|
||||
test_timm_perf() {
|
||||
print_cmake_info
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
conda_benchmark_deps
|
||||
torchbench_setup_macos
|
||||
|
||||
echo "Launching timm training perf run"
|
||||
python "$(pwd)"/benchmarks/dynamo/timm_models.py --backend eager --device mps --performance --training --output="${TEST_REPORTS_DIR}"/timm_training.csv
|
||||
|
||||
echo "Launching timm inference perf run"
|
||||
python "$(pwd)"/benchmarks/dynamo/timm_models.py --backend eager --device mps --performance --training --output="${TEST_REPORTS_DIR}"/timm_inference.csv
|
||||
|
||||
echo "timm benchmark on mps device completed"
|
||||
}
|
||||
|
||||
install_tlparse
|
||||
|
||||
if [[ $TEST_CONFIG == *"test_mps"* ]]; then
|
||||
if [[ $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_python_shard "${SHARD_NUMBER}"
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
|
||||
test_jit_hooks
|
||||
test_custom_backend
|
||||
fi
|
||||
else
|
||||
test_python_all
|
||||
if [[ $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||
test_python_shard "${SHARD_NUMBER}"
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
elif [[ "${SHARD_NUMBER}" == 2 ]]; then
|
||||
test_jit_hooks
|
||||
test_custom_backend
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ $TEST_CONFIG == *"perf_all"* ]]; then
|
||||
test_torchbench_perf
|
||||
test_hf_perf
|
||||
test_timm_perf
|
||||
elif [[ $TEST_CONFIG == *"perf_torchbench"* ]]; then
|
||||
test_torchbench_perf
|
||||
elif [[ $TEST_CONFIG == *"perf_hf"* ]]; then
|
||||
test_hf_perf
|
||||
elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then
|
||||
test_timm_perf
|
||||
elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then
|
||||
test_torchbench_smoketest
|
||||
else
|
||||
test_python_all
|
||||
test_libtorch
|
||||
test_custom_script_ops
|
||||
test_jit_hooks
|
||||
test_custom_backend
|
||||
fi
|
||||
|
||||
@ -48,17 +48,17 @@ NUM_TEST_SHARDS="${NUM_TEST_SHARDS:=1}"
|
||||
|
||||
export VALGRIND=ON
|
||||
# export TORCH_INDUCTOR_INSTALL_GXX=ON
|
||||
if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
# clang9 appears to miscompile code involving std::optional<c10::SymInt>,
|
||||
if [[ "$BUILD_ENVIRONMENT" == *clang9* ]]; then
|
||||
# clang9 appears to miscompile code involving c10::optional<c10::SymInt>,
|
||||
# such that valgrind complains along these lines:
|
||||
#
|
||||
# Conditional jump or move depends on uninitialised value(s)
|
||||
# at 0x40303A: ~optional_base (Optional.h:281)
|
||||
# by 0x40303A: call (Dispatcher.h:448)
|
||||
# by 0x40303A: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, std::optional<c10::SymInt>) (basic.cpp:10)
|
||||
# by 0x40303A: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:10)
|
||||
# by 0x403700: main (basic.cpp:16)
|
||||
# Uninitialised value was created by a stack allocation
|
||||
# at 0x402AAA: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, std::optional<c10::SymInt>) (basic.cpp:6)
|
||||
# at 0x402AAA: call(at::Tensor const&, c10::ArrayRef<c10::SymInt>, c10::ArrayRef<c10::SymInt>, c10::optional<c10::SymInt>) (basic.cpp:6)
|
||||
#
|
||||
# The problem does not appear with gcc or newer versions of clang (we tested
|
||||
# clang14). So we suppress valgrind testing for clang9 specifically.
|
||||
@ -72,7 +72,7 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
#
|
||||
# using namespace at;
|
||||
#
|
||||
# Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, std::optional<c10::SymInt> storage_offset) {
|
||||
# Tensor call(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, c10::optional<c10::SymInt> storage_offset) {
|
||||
# auto op = c10::Dispatcher::singleton()
|
||||
# .findSchemaOrThrow(at::_ops::as_strided::name, at::_ops::as_strided::overload_name)
|
||||
# .typed<at::_ops::as_strided::schema>();
|
||||
@ -81,7 +81,7 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
#
|
||||
# int main(int argv) {
|
||||
# Tensor b = empty({3, 4});
|
||||
# auto z = call(b, b.sym_sizes(), b.sym_strides(), std::nullopt);
|
||||
# auto z = call(b, b.sym_sizes(), b.sym_strides(), c10::nullopt);
|
||||
# }
|
||||
export VALGRIND=OFF
|
||||
fi
|
||||
@ -169,13 +169,9 @@ fi
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
||||
# Source Intel oneAPI envrioment script to enable xpu runtime related libraries
|
||||
# refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
|
||||
# refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html
|
||||
# shellcheck disable=SC1091
|
||||
source /opt/intel/oneapi/compiler/latest/env/vars.sh
|
||||
if [ -f /opt/intel/oneapi/umf/latest/env/vars.sh ]; then
|
||||
# shellcheck disable=SC1091
|
||||
source /opt/intel/oneapi/umf/latest/env/vars.sh
|
||||
fi
|
||||
# Check XPU status before testing
|
||||
xpu-smi discovery
|
||||
fi
|
||||
@ -200,9 +196,6 @@ install_tlparse
|
||||
# ASAN test is not working
|
||||
if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
|
||||
export ASAN_OPTIONS=detect_leaks=0:symbolize=1:detect_stack_use_after_return=true:strict_init_order=true:detect_odr_violation=1:detect_container_overflow=0:check_initialization_order=true:debug=true
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
export ASAN_OPTIONS="${ASAN_OPTIONS}:protect_shadow_gap=0"
|
||||
fi
|
||||
export UBSAN_OPTIONS=print_stacktrace=1:suppressions=$PWD/ubsan.supp
|
||||
export PYTORCH_TEST_WITH_ASAN=1
|
||||
export PYTORCH_TEST_WITH_UBSAN=1
|
||||
@ -240,8 +233,8 @@ if [[ "$BUILD_ENVIRONMENT" == *asan* ]]; then
|
||||
# it depends on a ton of dynamic libraries that most programs aren't gonna
|
||||
# have, and it applies to child processes.
|
||||
|
||||
LD_PRELOAD=$(clang --print-file-name=libclang_rt.asan-x86_64.so)
|
||||
export LD_PRELOAD
|
||||
# TODO: get rid of the hardcoded path
|
||||
export LD_PRELOAD=/usr/lib/llvm-15/lib/clang/15.0.7/lib/linux/libclang_rt.asan-x86_64.so
|
||||
# Disable valgrind for asan
|
||||
export VALGRIND=OFF
|
||||
|
||||
@ -288,7 +281,7 @@ test_python_shard() {
|
||||
|
||||
# modify LD_LIBRARY_PATH to ensure it has the conda env.
|
||||
# This set of tests has been shown to be buggy without it for the split-build
|
||||
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION --upload-artifacts-while-running
|
||||
time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose $PYTHON_TEST_EXTRA_OPTION
|
||||
|
||||
assert_git_not_dirty
|
||||
}
|
||||
@ -300,7 +293,7 @@ test_python() {
|
||||
}
|
||||
|
||||
|
||||
test_dynamo_wrapped_shard() {
|
||||
test_dynamo_shard() {
|
||||
if [[ -z "$NUM_TEST_SHARDS" ]]; then
|
||||
echo "NUM_TEST_SHARDS must be defined to run a Python test shard"
|
||||
exit 1
|
||||
@ -314,8 +307,7 @@ test_dynamo_wrapped_shard() {
|
||||
--exclude-distributed-tests \
|
||||
--exclude-torch-export-tests \
|
||||
--shard "$1" "$NUM_TEST_SHARDS" \
|
||||
--verbose \
|
||||
--upload-artifacts-while-running
|
||||
--verbose
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
@ -328,7 +320,6 @@ test_inductor_distributed() {
|
||||
python test/run_test.py -i distributed/test_c10d_functional_native.py --verbose
|
||||
python test/run_test.py -i distributed/_tensor/test_dtensor_compile.py --verbose
|
||||
python test/run_test.py -i distributed/tensor/parallel/test_micro_pipeline_tp.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/test_replicate_with_compiler.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_comm.py --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_multi_group --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_training.py -k test_train_parity_with_activation_checkpointing --verbose
|
||||
@ -340,12 +331,11 @@ test_inductor_distributed() {
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_compute_dtype --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_mixed_precision.py -k test_reduce_dtype --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_clip_grad_norm_.py -k test_clip_grad_norm_2d --verbose
|
||||
python test/run_test.py -i distributed/_composable/fsdp/test_fully_shard_compile.py --verbose
|
||||
python test/run_test.py -i distributed/fsdp/test_fsdp_tp_integration.py -k test_fsdp_tp_integration --verbose
|
||||
|
||||
# this runs on both single-gpu and multi-gpu instance. It should be smart about skipping tests that aren't supported
|
||||
# with if required # gpus aren't available
|
||||
python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives distributed/test_compute_comm_reordering --verbose
|
||||
python test/run_test.py --include distributed/test_dynamo_distributed distributed/test_inductor_collectives --verbose
|
||||
assert_git_not_dirty
|
||||
}
|
||||
|
||||
@ -379,39 +369,21 @@ test_inductor_aoti() {
|
||||
CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
|
||||
}
|
||||
|
||||
test_inductor_cpp_wrapper() {
|
||||
export TORCHINDUCTOR_CPP_WRAPPER=1
|
||||
test_inductor_cpp_wrapper_abi_compatible() {
|
||||
export TORCHINDUCTOR_ABI_COMPATIBLE=1
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
# Run certain inductor unit tests with cpp wrapper. In the end state, we should be able to run all the inductor
|
||||
# unit tests with cpp wrapper.
|
||||
python test/run_test.py --include inductor/test_torchinductor.py --verbose
|
||||
echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1"
|
||||
PYTORCH_TESTING_DEVICE_ONLY_FOR="" python test/run_test.py --include inductor/test_cpu_cpp_wrapper
|
||||
python test/run_test.py --include inductor/test_cuda_cpp_wrapper inductor/test_cpu_repro inductor/test_extension_backend
|
||||
|
||||
|
||||
# Run inductor benchmark tests with cpp wrapper.
|
||||
# Skip benchmark tests if it's in rerun-disabled-mode.
|
||||
if [[ "${PYTORCH_TEST_RERUN_DISABLED_TESTS}" == "1" ]]; then
|
||||
echo "skip dynamo benchmark tests for rerun-disabled-test"
|
||||
else
|
||||
echo "run dynamo benchmark tests with cpp wrapper"
|
||||
python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
|
||||
TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/timm_models.py --device cuda --accuracy --amp \
|
||||
--training --inductor --disable-cudagraphs --only vit_base_patch16_224 \
|
||||
--output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv"
|
||||
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
|
||||
fi
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_training.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_timm_training.csv"
|
||||
}
|
||||
|
||||
# "Global" flags for inductor benchmarking controlled by TEST_CONFIG
|
||||
@ -431,7 +403,7 @@ pr_time_benchmarks() {
|
||||
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks source benchmarks/dynamo/pr_time_benchmarks/benchmark_runner.sh "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "benchmarks/dynamo/pr_time_benchmarks/benchmarks"
|
||||
echo "benchmark results on current PR: "
|
||||
cat "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
|
||||
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv" "$TEST_REPORTS_DIR/new_expected_results.csv"
|
||||
PYTHONPATH=$(pwd)/benchmarks/dynamo/pr_time_benchmarks python benchmarks/dynamo/pr_time_benchmarks/check_results.py "benchmarks/dynamo/pr_time_benchmarks/expected_results.csv" "$TEST_REPORTS_DIR/pr_time_benchmarks_results.csv"
|
||||
}
|
||||
|
||||
if [[ "${TEST_CONFIG}" == *pr_time_benchmarks* ]]; then
|
||||
@ -487,8 +459,6 @@ test_perf_for_dashboard() {
|
||||
device=cuda_a10g
|
||||
fi
|
||||
|
||||
export TORCHINDUCTOR_CPP_WRAPPER=1
|
||||
|
||||
for mode in "${modes[@]}"; do
|
||||
if [[ "$mode" == "inference" ]]; then
|
||||
dtype=bfloat16
|
||||
@ -541,7 +511,7 @@ test_perf_for_dashboard() {
|
||||
"${target_flag[@]}" --"$mode" --"$dtype" --export --disable-cudagraphs "$@" \
|
||||
--output "$TEST_REPORTS_DIR/${backend}_export_${suite}_${dtype}_${mode}_${device}_${target}.csv"
|
||||
fi
|
||||
$TASKSET python "benchmarks/dynamo/$suite.py" \
|
||||
TORCHINDUCTOR_ABI_COMPATIBLE=1 $TASKSET python "benchmarks/dynamo/$suite.py" \
|
||||
"${target_flag[@]}" --"$mode" --"$dtype" --export-aot-inductor --disable-cudagraphs "$@" \
|
||||
--output "$TEST_REPORTS_DIR/${backend}_aot_inductor_${suite}_${dtype}_${mode}_${device}_${target}.csv"
|
||||
fi
|
||||
@ -596,6 +566,13 @@ test_single_dynamo_benchmark() {
|
||||
test_perf_for_dashboard "$suite" \
|
||||
"${DYNAMO_BENCHMARK_FLAGS[@]}" "$@" "${partition_flags[@]}"
|
||||
else
|
||||
if [[ "${TEST_CONFIG}" == *aot_inductor* && "${TEST_CONFIG}" != *cpu_aot_inductor* ]]; then
|
||||
# Test AOTInductor with the ABI-compatible mode on CI
|
||||
# This can be removed once the ABI-compatible mode becomes default.
|
||||
# For CPU device, we perfer non ABI-compatible mode on CI when testing AOTInductor.
|
||||
export TORCHINDUCTOR_ABI_COMPATIBLE=1
|
||||
fi
|
||||
|
||||
if [[ "${TEST_CONFIG}" == *_avx2* ]]; then
|
||||
TEST_CONFIG=${TEST_CONFIG//_avx2/}
|
||||
fi
|
||||
@ -617,11 +594,6 @@ test_single_dynamo_benchmark() {
|
||||
}
|
||||
|
||||
test_inductor_micro_benchmark() {
|
||||
# torchao requires cuda 8.0 or above for bfloat16 support
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
export TORCH_CUDA_ARCH_LIST="8.0;8.6"
|
||||
fi
|
||||
install_torchao
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
if [[ "${TEST_CONFIG}" == *cpu* ]]; then
|
||||
test_inductor_set_cpu_affinity
|
||||
@ -676,6 +648,17 @@ test_inductor_torchbench_smoketest_perf() {
|
||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
||||
mkdir -p "$TEST_REPORTS_DIR"
|
||||
|
||||
# Test some models in the cpp wrapper mode
|
||||
TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only hf_T5 --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only llama --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
TORCHINDUCTOR_ABI_COMPATIBLE=1 TORCHINDUCTOR_CPP_WRAPPER=1 python benchmarks/dynamo/torchbench.py --device cuda --accuracy \
|
||||
--bfloat16 --inference --inductor --only moco --output "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/inductor_cpp_wrapper_inference.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv"
|
||||
|
||||
python benchmarks/dynamo/torchbench.py --device cuda --performance --backend inductor --float16 --training \
|
||||
--batch-size-file "$(realpath benchmarks/dynamo/torchbench_models_list.txt)" --only hf_Bert \
|
||||
--output "$TEST_REPORTS_DIR/inductor_training_smoketest.csv"
|
||||
@ -765,9 +748,19 @@ test_inductor_torchbench_cpu_smoketest_perf(){
|
||||
fi
|
||||
cat "$output_name"
|
||||
# The threshold value needs to be actively maintained to make this check useful.
|
||||
# Allow 1% variance for CPU perf to accommodate perf fluctuation
|
||||
python benchmarks/dynamo/check_perf_csv.py -f "$output_name" -t "$speedup_target" -s 0.99
|
||||
python benchmarks/dynamo/check_perf_csv.py -f "$output_name" -t "$speedup_target"
|
||||
done
|
||||
|
||||
# Add a few ABI-compatible accuracy tests for CPU. These can be removed once we turn on ABI-compatible as default.
|
||||
TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \
|
||||
--bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only adv_inception_v3 \
|
||||
--output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv"
|
||||
TORCHINDUCTOR_ABI_COMPATIBLE=1 python benchmarks/dynamo/timm_models.py --device cpu --accuracy \
|
||||
--bfloat16 --inference --export-aot-inductor --disable-cudagraphs --only beit_base_patch16_224 \
|
||||
--output "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv"
|
||||
python benchmarks/dynamo/check_accuracy.py \
|
||||
--actual "$TEST_REPORTS_DIR/aot_inductor_smoke_test.csv" \
|
||||
--expected "benchmarks/dynamo/ci_expected_accuracy/aot_inductor_timm_inference.csv"
|
||||
}
|
||||
|
||||
test_torchbench_gcp_smoketest(){
|
||||
@ -825,7 +818,7 @@ test_without_numpy() {
|
||||
# Regression test for https://github.com/pytorch/pytorch/issues/66353
|
||||
python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;print(torch.tensor([torch.tensor(0.), torch.tensor(1.)]))"
|
||||
# Regression test for https://github.com/pytorch/pytorch/issues/109387
|
||||
if [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
|
||||
if [[ "${TEST_CONFIG}" == *dynamo* ]]; then
|
||||
python -c "import sys;sys.path.insert(0, 'fake_numpy');import torch;torch.compile(lambda x:print(x))('Hello World')"
|
||||
fi
|
||||
popd
|
||||
@ -959,9 +952,6 @@ test_distributed() {
|
||||
python test/run_test.py --cpp --verbose -i cpp/HashStoreTest
|
||||
python test/run_test.py --cpp --verbose -i cpp/TCPStoreTest
|
||||
|
||||
echo "Testing multi-GPU linalg tests"
|
||||
python test/run_test.py -i test_linalg.py -k test_matmul_offline_mgpu_tunable --verbose
|
||||
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
MPIEXEC=$(command -v mpiexec)
|
||||
if [[ -n "$MPIEXEC" ]]; then
|
||||
@ -1211,7 +1201,7 @@ EOF
|
||||
git reset --hard "${SHA_TO_COMPARE}"
|
||||
git submodule sync && git submodule update --init --recursive
|
||||
echo "::group::Installing Torch From Base Commit"
|
||||
pip3 install -r requirements.txt
|
||||
pip install -r requirements.txt
|
||||
# shellcheck source=./common-build.sh
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common-build.sh"
|
||||
python setup.py bdist_wheel --bdist-dir="base_bdist_tmp" --dist-dir="base_dist"
|
||||
@ -1368,11 +1358,10 @@ test_executorch() {
|
||||
export EXECUTORCH_BUILD_PYBIND=ON
|
||||
export CMAKE_ARGS="-DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON"
|
||||
|
||||
# For llama3
|
||||
bash examples/models/llama3_2_vision/install_requirements.sh
|
||||
# NB: We need to rebuild ExecuTorch runner here because it depends on PyTorch
|
||||
# from the PR
|
||||
bash .ci/scripts/setup-linux.sh cmake
|
||||
# shellcheck disable=SC1091
|
||||
source .ci/scripts/setup-linux.sh cmake
|
||||
|
||||
echo "Run ExecuTorch unit tests"
|
||||
pytest -v -n auto
|
||||
@ -1382,7 +1371,7 @@ test_executorch() {
|
||||
echo "Run ExecuTorch regression tests for some models"
|
||||
# TODO(huydhn): Add more coverage here using ExecuTorch's gather models script
|
||||
# shellcheck disable=SC1091
|
||||
source .ci/scripts/test_model.sh mv3 cmake xnnpack-quantization-delegation ''
|
||||
source .ci/scripts/test.sh mv3 cmake xnnpack-quantization-delegation ''
|
||||
|
||||
popd
|
||||
|
||||
@ -1395,8 +1384,7 @@ test_executorch() {
|
||||
|
||||
test_linux_aarch64() {
|
||||
python test/run_test.py --include test_modules test_mkldnn test_mkldnn_fusion test_openmp test_torch test_dynamic_shapes \
|
||||
test_transformers test_multiprocessing test_numpy_interop test_autograd test_binary_ufuncs test_complex test_spectral_ops \
|
||||
test_foreach test_reductions test_unary_ufuncs \
|
||||
test_transformers test_multiprocessing test_numpy_interop \
|
||||
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
||||
|
||||
# Dynamo tests
|
||||
@ -1414,7 +1402,6 @@ test_linux_aarch64() {
|
||||
inductor/test_pattern_matcher inductor/test_perf inductor/test_profiler inductor/test_select_algorithm inductor/test_smoke \
|
||||
inductor/test_split_cat_fx_passes inductor/test_standalone_compile inductor/test_torchinductor \
|
||||
inductor/test_torchinductor_codegen_dynamic_shapes inductor/test_torchinductor_dynamic_shapes inductor/test_memory \
|
||||
inductor/test_triton_cpu_backend inductor/test_triton_extension_backend \
|
||||
--shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose
|
||||
}
|
||||
|
||||
@ -1422,11 +1409,7 @@ if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-baze
|
||||
(cd test && python -c "import torch; print(torch.__config__.show())")
|
||||
(cd test && python -c "import torch; print(torch.__config__.parallel_info())")
|
||||
fi
|
||||
if [[ "${TEST_CONFIG}" == *numpy_2* ]]; then
|
||||
# Install numpy-2.0.2 and test inductor tracing
|
||||
python -mpip install --pre numpy==2.0.2
|
||||
python test/run_test.py --include dynamo/test_unspec.py
|
||||
elif [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then
|
||||
if [[ "${BUILD_ENVIRONMENT}" == *aarch64* && "${TEST_CONFIG}" != *perf_cpu_aarch64* ]]; then
|
||||
test_linux_aarch64
|
||||
elif [[ "${TEST_CONFIG}" == *backward* ]]; then
|
||||
test_forward_backward_compatibility
|
||||
@ -1470,6 +1453,7 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
else
|
||||
install_torchaudio cuda
|
||||
fi
|
||||
install_torchtext
|
||||
install_torchvision
|
||||
TORCH_CUDA_ARCH_LIST="8.0;8.6" pip_install git+https://github.com/pytorch/ao.git
|
||||
id=$((SHARD_NUMBER-1))
|
||||
@ -1495,11 +1479,9 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||
fi
|
||||
PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
|
||||
install_torchaudio cuda
|
||||
elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper_abi_compatible* ]]; then
|
||||
install_torchvision
|
||||
checkout_install_torchbench hf_T5 llama moco
|
||||
PYTHONPATH=$(pwd)/torchbench test_inductor_cpp_wrapper
|
||||
test_inductor_cpp_wrapper_abi_compatible
|
||||
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||
install_torchvision
|
||||
test_inductor_shard "${SHARD_NUMBER}"
|
||||
@ -1508,9 +1490,9 @@ elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||
test_inductor_distributed
|
||||
fi
|
||||
fi
|
||||
elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
|
||||
elif [[ "${TEST_CONFIG}" == *dynamo* ]]; then
|
||||
install_torchvision
|
||||
test_dynamo_wrapped_shard "${SHARD_NUMBER}"
|
||||
test_dynamo_shard "${SHARD_NUMBER}"
|
||||
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||
test_aten
|
||||
fi
|
||||
|
||||
@ -52,8 +52,7 @@ if not errorlevel 0 goto fail
|
||||
|
||||
if "%USE_XPU%"=="1" (
|
||||
:: Activate xpu environment - VS env is required for xpu
|
||||
call "C:\Program Files (x86)\Intel\oneAPI\compiler\latest\env\vars.bat"
|
||||
call "C:\Program Files (x86)\Intel\oneAPI\ocloc\latest\env\vars.bat"
|
||||
call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat"
|
||||
if errorlevel 1 exit /b 1
|
||||
:: Reduce build time. Only have MTL self-hosted runner now
|
||||
SET TORCH_XPU_ARCH_LIST=xe-lpg
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
@echo on
|
||||
REM Description: Install Intel Support Packages on Windows
|
||||
REM BKM reference: https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
|
||||
REM BKM reference: https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpu/2-5.html
|
||||
|
||||
set XPU_INSTALL_MODE=%~1
|
||||
if "%XPU_INSTALL_MODE%"=="" goto xpu_bundle_install_start
|
||||
@ -28,28 +28,15 @@ if "%XPU_INSTALL_MODE%"=="driver" goto xpu_install_end
|
||||
|
||||
set XPU_BUNDLE_PARENT_DIR=C:\Program Files (x86)\Intel\oneAPI
|
||||
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-for-pytorch-gpu-dev_p_0.5.3.37_offline.exe
|
||||
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.intel-for-pytorch-gpu-dev.product
|
||||
set XPU_PTI_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-pti-dev_p_0.9.0.37_offline.exe
|
||||
set XPU_BUNDLE_VERSION=0.5.3+31
|
||||
set XPU_PTI_VERSION=0.9.0+36
|
||||
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.intel-for-pytorch-gpu-dev.product
|
||||
set XPU_PTI_PRODUCT_NAME=intel.oneapi.win.intel-pti-dev.product
|
||||
set XPU_BUNDLE_INSTALLED=0
|
||||
set XPU_PTI_INSTALLED=0
|
||||
set XPU_BUNDLE_UNINSTALL=0
|
||||
set XPU_EXTRA_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d1a91e2-e8b8-40a5-8c7f-5db768a6a60c/w_intel-pti-dev_p_0.9.0.37_offline.exe
|
||||
set XPU_EXTRA_PRODUCT_NAME=intel.oneapi.win.intel-pti-dev.product
|
||||
set XPU_EXTRA_VERSION=0.9.0+36
|
||||
set XPU_EXTRA_INSTALLED=0
|
||||
set XPU_EXTRA_UNINSTALL=0
|
||||
|
||||
if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.0] (
|
||||
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/efc86abd-cb77-452e-a03f-a741895b8ece/intel-deep-learning-essentials-2025.0.0.336_offline.exe
|
||||
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product
|
||||
set XPU_BUNDLE_VERSION=2025.0.0+335
|
||||
set XPU_BUNDLE_INSTALLED=0
|
||||
set XPU_BUNDLE_UNINSTALL=0
|
||||
set XPU_EXTRA_URL=NULL
|
||||
set XPU_EXTRA_PRODUCT_NAME=intel.oneapi.win.compiler.product
|
||||
set XPU_EXTRA_VERSION=2025.0.1+1226
|
||||
set XPU_EXTRA_INSTALLED=0
|
||||
set XPU_EXTRA_UNINSTALL=0
|
||||
)
|
||||
set XPU_PTI_UNINSTALL=0
|
||||
|
||||
:: Check if XPU bundle is target version or already installed
|
||||
if exist "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" goto xpu_bundle_ver_check
|
||||
@ -64,34 +51,25 @@ for /f "tokens=1,2" %%a in (xpu_bundle_installed_ver.log) do (
|
||||
echo %%a Installed Version: %%b
|
||||
set XPU_BUNDLE_INSTALLED=1
|
||||
if not "%XPU_BUNDLE_VERSION%"=="%%b" (
|
||||
start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %%a --product-ver %%b --log-dir uninstall_bundle
|
||||
start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %XPU_BUNDLE_PRODUCT_NAME% --product-ver %%b --log-dir uninstall_bundle
|
||||
set XPU_BUNDLE_UNINSTALL=1
|
||||
)
|
||||
)
|
||||
if "%%a"=="%XPU_EXTRA_PRODUCT_NAME%" (
|
||||
if "%%a"=="%XPU_PTI_PRODUCT_NAME%" (
|
||||
echo %%a Installed Version: %%b
|
||||
set XPU_EXTRA_INSTALLED=1
|
||||
if not "%XPU_EXTRA_VERSION%"=="%%b" (
|
||||
start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %%a --product-ver %%b --log-dir uninstall_bundle
|
||||
set XPU_EXTRA_UNINSTALL=1
|
||||
set XPU_PTI_INSTALLED=1
|
||||
if not "%XPU_PTI_VERSION%"=="%%b" (
|
||||
start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %XPU_PTI_PRODUCT_NAME% --product-ver %%b --log-dir uninstall_bundle
|
||||
set XPU_PTI_UNINSTALL=1
|
||||
)
|
||||
)
|
||||
if not "%%b" == "Version" if not [%%b]==[] if not "%%a"=="%XPU_BUNDLE_PRODUCT_NAME%" if not "%%a"=="%XPU_EXTRA_PRODUCT_NAME%" (
|
||||
echo "Uninstalling...."
|
||||
start /wait "Installer Title" "%XPU_BUNDLE_PARENT_DIR%\Installer\installer.exe" --action=remove --eula=accept --silent --product-id %%a --product-ver %%b --log-dir uninstall_bundle
|
||||
)
|
||||
)
|
||||
if errorlevel 1 exit /b 1
|
||||
if exist xpu_bundle_installed_ver.log del xpu_bundle_installed_ver.log
|
||||
if exist uninstall_bundle rmdir /s /q uninstall_bundle
|
||||
if "%XPU_BUNDLE_INSTALLED%"=="0" goto xpu_bundle_install
|
||||
if "%XPU_BUNDLE_UNINSTALL%"=="1" goto xpu_bundle_install
|
||||
|
||||
:xpu_extra_check
|
||||
|
||||
if "%XPU_EXTRA_URL%"=="NULL" goto xpu_install_end
|
||||
if "%XPU_EXTRA_INSTALLED%"=="0" goto xpu_extra_install
|
||||
if "%XPU_EXTRA_UNINSTALL%"=="1" goto xpu_extra_install
|
||||
if "%XPU_PTI_INSTALLED%"=="0" goto xpu_pti_install
|
||||
if "%XPU_PTI_UNINSTALL%"=="1" goto xpu_pti_install
|
||||
goto xpu_install_end
|
||||
|
||||
:xpu_bundle_install
|
||||
@ -101,14 +79,13 @@ echo "XPU Bundle installing..."
|
||||
start /wait "Intel Pytorch Bundle Installer" "xpu_bundle.exe" --action=install --eula=accept --silent --log-dir install_bundle
|
||||
if errorlevel 1 exit /b 1
|
||||
del xpu_bundle.exe
|
||||
goto xpu_extra_check
|
||||
|
||||
:xpu_extra_install
|
||||
:xpu_pti_install
|
||||
|
||||
curl -o xpu_extra.exe --retry 3 --retry-all-errors -k %XPU_EXTRA_URL%
|
||||
echo "Intel XPU EXTRA installing..."
|
||||
start /wait "Intel XPU EXTRA Installer" "xpu_extra.exe" --action=install --eula=accept --silent --log-dir install_bundle
|
||||
curl -o xpu_pti.exe --retry 3 --retry-all-errors -k %XPU_PTI_URL%
|
||||
echo "XPU PTI installing..."
|
||||
start /wait "Intel PTI Installer" "xpu_pti.exe" --action=install --eula=accept --silent --log-dir install_bundle
|
||||
if errorlevel 1 exit /b 1
|
||||
del xpu_extra.exe
|
||||
del xpu_pti.exe
|
||||
|
||||
:xpu_install_end
|
||||
|
||||
@ -46,9 +46,6 @@ python -m pip install tlparse==0.3.25
|
||||
# Install parameterized
|
||||
python -m pip install parameterized==0.8.1
|
||||
|
||||
# Install pulp for testing ilps under torch\distributed\_tools
|
||||
python -m pip install pulp==2.9.0
|
||||
|
||||
run_tests() {
|
||||
# Run nvidia-smi if available
|
||||
for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
|
||||
|
||||
@ -14,7 +14,7 @@ mkdir -p ${ZIP_DIR}/src
|
||||
cp -R ${ARTIFACTS_DIR}/arm64/include ${ZIP_DIR}/install/
|
||||
# build a FAT bianry
|
||||
cd ${ZIP_DIR}/install/lib
|
||||
target_libs=(libc10.a libclog.a libcpuinfo.a libeigen_blas.a libpthreadpool.a libpytorch_qnnpack.a libtorch_cpu.a libtorch.a libXNNPACK.a libmicrokernels-prod.a)
|
||||
target_libs=(libc10.a libclog.a libcpuinfo.a libeigen_blas.a libpthreadpool.a libpytorch_qnnpack.a libtorch_cpu.a libtorch.a libXNNPACK.a)
|
||||
for lib in ${target_libs[*]}
|
||||
do
|
||||
if [ -f "${ARTIFACTS_DIR}/x86_64/lib/${lib}" ] && [ -f "${ARTIFACTS_DIR}/arm64/lib/${lib}" ]; then
|
||||
|
||||
@ -80,8 +80,8 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:
|
||||
# Only linux Python < 3.13 are supported wheels for triton
|
||||
TRITON_REQUIREMENT="triton==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
|
||||
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
|
||||
TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt)
|
||||
TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
|
||||
TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt)
|
||||
TRITON_REQUIREMENT="pytorch-triton==${TRITON_VERSION}+${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
|
||||
fi
|
||||
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${PYTORCH_EXTRA_INSTALL_REQUIREMENTS} | ${TRITON_REQUIREMENT}"
|
||||
fi
|
||||
@ -90,8 +90,8 @@ fi
|
||||
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*rocm.* && $(uname) == "Linux" ]]; then
|
||||
TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
|
||||
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
|
||||
TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt)
|
||||
TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
|
||||
TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton.txt)
|
||||
TRITON_REQUIREMENT="pytorch-triton-rocm==${TRITON_VERSION}+${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
|
||||
fi
|
||||
if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
|
||||
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"
|
||||
@ -104,8 +104,8 @@ fi
|
||||
if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*xpu.* && $(uname) == "Linux" ]]; then
|
||||
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}; ${TRITON_CONSTRAINT}"
|
||||
if [[ -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_BUILD_VERSION" =~ .*dev.* ]]; then
|
||||
TRITON_SHORTHASH=$(cut -c1-8 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-xpu.txt)
|
||||
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+git${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
|
||||
TRITON_SHORTHASH=$(cut -c1-10 $PYTORCH_ROOT/.ci/docker/ci_commit_pins/triton-xpu.txt)
|
||||
TRITON_REQUIREMENT="pytorch-triton-xpu==${TRITON_VERSION}+${TRITON_SHORTHASH}; ${TRITON_CONSTRAINT}"
|
||||
fi
|
||||
if [[ -z "${PYTORCH_EXTRA_INSTALL_REQUIREMENTS:-}" ]]; then
|
||||
export PYTORCH_EXTRA_INSTALL_REQUIREMENTS="${TRITON_REQUIREMENT}"
|
||||
@ -114,12 +114,6 @@ if [[ "$PACKAGE_TYPE" =~ .*wheel.* && -n "$PYTORCH_BUILD_VERSION" && "$PYTORCH_B
|
||||
fi
|
||||
fi
|
||||
|
||||
USE_GLOO_WITH_OPENSSL="ON"
|
||||
if [[ "$GPU_ARCH_TYPE" =~ .*aarch64.* ]]; then
|
||||
USE_GLOO_WITH_OPENSSL="OFF"
|
||||
USE_GOLD_LINKER="OFF"
|
||||
fi
|
||||
|
||||
cat >"$envfile" <<EOL
|
||||
# =================== The following code will be executed inside Docker container ===================
|
||||
export TZ=UTC
|
||||
@ -159,7 +153,7 @@ export DOCKER_IMAGE="$DOCKER_IMAGE"
|
||||
|
||||
|
||||
export USE_GOLD_LINKER="${USE_GOLD_LINKER}"
|
||||
export USE_GLOO_WITH_OPENSSL="${USE_GLOO_WITH_OPENSSL}"
|
||||
export USE_GLOO_WITH_OPENSSL="ON"
|
||||
# =================== The above code will be executed inside Docker container ===================
|
||||
EOL
|
||||
|
||||
|
||||
@ -13,7 +13,6 @@ export VC_YEAR=2019
|
||||
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
||||
export VC_YEAR=2022
|
||||
export USE_SCCACHE=0
|
||||
export XPU_VERSION=2025.0
|
||||
fi
|
||||
|
||||
echo "Free space on filesystem before build:"
|
||||
|
||||
@ -8,7 +8,6 @@ export VC_YEAR=2019
|
||||
|
||||
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
||||
export VC_YEAR=2022
|
||||
export XPU_VERSION=2025.0
|
||||
fi
|
||||
|
||||
pushd "$BUILDER_ROOT"
|
||||
|
||||
@ -101,17 +101,9 @@ SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: c++17
|
||||
StatementMacros:
|
||||
- C10_DEFINE_bool
|
||||
- C10_DEFINE_int
|
||||
- C10_DEFINE_int32
|
||||
- C10_DEFINE_int64
|
||||
- C10_DEFINE_string
|
||||
- DEFINE_BINARY
|
||||
- PyObject_HEAD
|
||||
- PyObject_VAR_HEAD
|
||||
- PyException_HEAD
|
||||
- TORCH_DECLARE_bool
|
||||
|
||||
TabWidth: 8
|
||||
UseTab: Never
|
||||
---
|
||||
|
||||
@ -29,19 +29,19 @@ cppcoreguidelines-*,
|
||||
-cppcoreguidelines-pro-type-static-cast-downcast,
|
||||
-cppcoreguidelines-pro-type-union-access,
|
||||
-cppcoreguidelines-pro-type-vararg,
|
||||
-cppcoreguidelines-special-member-functions,
|
||||
-cppcoreguidelines-non-private-member-variables-in-classes,
|
||||
-facebook-hte-RelativeInclude,
|
||||
hicpp-exception-baseclass,
|
||||
hicpp-avoid-goto,
|
||||
misc-*,
|
||||
-misc-confusable-identifiers,
|
||||
-misc-const-correctness,
|
||||
-misc-include-cleaner,
|
||||
-misc-use-anonymous-namespace,
|
||||
-misc-unused-parameters,
|
||||
-misc-no-recursion,
|
||||
-misc-non-private-member-variables-in-classes,
|
||||
-misc-unused-using-decls,
|
||||
-misc-confusable-identifiers,
|
||||
modernize-*,
|
||||
-modernize-macro-to-enum,
|
||||
-modernize-return-braced-init-list,
|
||||
@ -63,7 +63,5 @@ readability-string-compare,
|
||||
HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
|
||||
WarningsAsErrors: '*'
|
||||
CheckOptions:
|
||||
cppcoreguidelines-special-member-functions.AllowSoleDefaultDtor: true
|
||||
cppcoreguidelines-special-member-functions.AllowImplicitlyDeletedCopyOrMove: true
|
||||
misc-header-include-cycle.IgnoredFilesList: 'format.h;ivalue.h;custom_class.h;Dict.h;List.h;IListRef.h'
|
||||
misc-header-include-cycle.IgnoredFilesList: 'format.h;ivalue.h;custom_class.h;Dict.h;List.h'
|
||||
...
|
||||
|
||||
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -5,4 +5,3 @@
|
||||
.github/scripts/gql_mocks.json linguist-generated=true
|
||||
third_party/LICENSES_BUNDLED.txt linguist-generated=true
|
||||
tools/build/bazel/requirements.txt linguist-generated=true
|
||||
torch/csrc/utils/generated_serialization_types.h linguist-generated=true
|
||||
|
||||
3
.github/ISSUE_TEMPLATE/ci-sev.md
vendored
3
.github/ISSUE_TEMPLATE/ci-sev.md
vendored
@ -5,7 +5,8 @@ about: Tracking incidents for PyTorch's CI infra.
|
||||
|
||||
> NOTE: Remember to label this issue with "`ci: sev`"
|
||||
|
||||
<!-- Add the `merge blocking` label to this PR to prevent PRs from being merged while this issue is open -->
|
||||
<!-- uncomment the below line if you don't want this SEV to block merges -->
|
||||
<!-- **MERGE BLOCKING** -->
|
||||
|
||||
## Current Status
|
||||
*Status could be: preemptive, ongoing, mitigated, closed. Also tell people if they need to take action to fix it (i.e. rebase)*.
|
||||
|
||||
28
.github/ISSUE_TEMPLATE/pt2-bug-report.yml
vendored
28
.github/ISSUE_TEMPLATE/pt2-bug-report.yml
vendored
@ -14,7 +14,7 @@ body:
|
||||
|
||||
- Ensure rtol/atol are at default tolerances
|
||||
|
||||
- Don't compare indices of max/min etc, because that avoids the above requirement
|
||||
- Dont compare indices of max/min etc, because that avoids the above requirement
|
||||
|
||||
- If comparing eager and torch.compile at fp16/bf16, you should use fp32 as baseline
|
||||
|
||||
@ -25,14 +25,6 @@ body:
|
||||
label: 🐛 Describe the bug
|
||||
description: |
|
||||
Please provide a clear and concise description of what the bug is.
|
||||
|
||||
See https://pytorch.org/docs/main/torch.compiler_troubleshooting.html#reporting-issues
|
||||
for guidance on what to additionally include. In particular, consider including:
|
||||
|
||||
- The `tlparse` for your program
|
||||
- Ablation - which `torch.compile` backend/mode/settings cause the bug
|
||||
- A minimal reproducer
|
||||
|
||||
placeholder: |
|
||||
A clear and concise description of what the bug is.
|
||||
validations:
|
||||
@ -47,7 +39,25 @@ body:
|
||||
Error...
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Minified repro
|
||||
description: |
|
||||
Please run the minifier on your example and paste the minified code below
|
||||
Learn more here https://pytorch.org/docs/main/torch.compiler_troubleshooting.html
|
||||
placeholder: |
|
||||
env TORCHDYNAMO_REPRO_AFTER="aot" python your_model.py
|
||||
or
|
||||
env TORCHDYNAMO_REPRO_AFTER="dynamo" python your_model.py
|
||||
|
||||
import torch
|
||||
...
|
||||
|
||||
# torch version: 2.0.....
|
||||
|
||||
class Repro(torch.nn.Module)
|
||||
validations:
|
||||
required: false
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Versions
|
||||
|
||||
2
.github/actions/build-android/action.yml
vendored
2
.github/actions/build-android/action.yml
vendored
@ -42,7 +42,6 @@ runs:
|
||||
PR_NUMBER: ${{ github.event.pull_request.number }}
|
||||
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
||||
SCCACHE_REGION: us-east-1
|
||||
DOCKER_IMAGE: ${{ inputs.docker-image }}
|
||||
MATRIX_ARCH: ${{ inputs.arch }}
|
||||
run: |
|
||||
@ -57,7 +56,6 @@ runs:
|
||||
-e SHA1 \
|
||||
-e BRANCH \
|
||||
-e SCCACHE_BUCKET \
|
||||
-e SCCACHE_REGION \
|
||||
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
||||
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
||||
--security-opt seccomp=unconfined \
|
||||
|
||||
@ -26,7 +26,7 @@ runs:
|
||||
|
||||
- name: Download PyTorch Build Artifacts from GHA
|
||||
if: ${{ inputs.use-gha }}
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: ${{ inputs.name }}
|
||||
|
||||
|
||||
@ -18,7 +18,7 @@ runs:
|
||||
|
||||
- name: Download TD Artifacts from GHA
|
||||
if: inputs.use-gha
|
||||
uses: actions/download-artifact@v4
|
||||
uses: actions/download-artifact@v3
|
||||
with:
|
||||
name: td_results.json
|
||||
|
||||
|
||||
12
.github/actions/linux-test/action.yml
vendored
12
.github/actions/linux-test/action.yml
vendored
@ -47,14 +47,7 @@ inputs:
|
||||
GITHUB_TOKEN:
|
||||
description: GitHub token
|
||||
required: true
|
||||
disable-monitor:
|
||||
description: |
|
||||
[Experimental] Disable utilization monitoring for tests.
|
||||
Currently, by default we disable the monitor job and only look for specific tests,
|
||||
since we are investigating the behaviour of the monitor script with different tests.
|
||||
required: false
|
||||
type: boolean
|
||||
default: true
|
||||
|
||||
#env:
|
||||
# GIT_DEFAULT_BRANCH: ${{ inputs.default_branch }}
|
||||
|
||||
@ -122,7 +115,6 @@ runs:
|
||||
|
||||
- name: Start monitoring script
|
||||
id: monitor-script
|
||||
if: ${{ !inputs.disable-monitor }}
|
||||
shell: bash
|
||||
continue-on-error: true
|
||||
run: |
|
||||
@ -297,7 +289,7 @@ runs:
|
||||
cat test/**/*_toprint.log || true
|
||||
|
||||
- name: Stop monitoring script
|
||||
if: ${{ always() && steps.monitor-script.outputs.monitor-script-pid }}
|
||||
if: always() && steps.monitor-script.outputs.monitor-script-pid
|
||||
shell: bash
|
||||
continue-on-error: true
|
||||
env:
|
||||
|
||||
@ -26,7 +26,7 @@ runs:
|
||||
retry_wait_seconds: 30
|
||||
command: |
|
||||
set -eu
|
||||
python3 -m pip install boto3==1.35.42
|
||||
python3 -m pip install boto3==1.19.12
|
||||
|
||||
- name: Download the cache
|
||||
shell: bash
|
||||
|
||||
@ -33,7 +33,7 @@ runs:
|
||||
retry_wait_seconds: 30
|
||||
command: |
|
||||
set -eu
|
||||
python3 -m pip install boto3==1.35.42
|
||||
python3 -m pip install boto3==1.19.12
|
||||
|
||||
- name: Upload the cache
|
||||
shell: bash
|
||||
|
||||
2
.github/actions/setup-linux/action.yml
vendored
2
.github/actions/setup-linux/action.yml
vendored
@ -20,7 +20,7 @@ runs:
|
||||
elif [[ $runner_name_str == *"gcp"* ]]; then
|
||||
echo "Runner is from Google Cloud Platform, No info on ec2 metadata"
|
||||
else
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
fi
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
|
||||
2
.github/actions/setup-win/action.yml
vendored
2
.github/actions/setup-win/action.yml
vendored
@ -18,7 +18,7 @@ runs:
|
||||
# Pulled from instance metadata endpoint for EC2
|
||||
# see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instancedata-data-retrieval.html
|
||||
category=$1
|
||||
curl -H "X-aws-ec2-metadata-token: $(curl -s -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 30")" -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
curl -fsSL "http://169.254.169.254/latest/meta-data/${category}"
|
||||
}
|
||||
echo "ami-id: $(get_ec2_metadata ami-id)"
|
||||
echo "instance-id: $(get_ec2_metadata instance-id)"
|
||||
|
||||
39
.github/actions/upload-sccache-stats/action.yml
vendored
39
.github/actions/upload-sccache-stats/action.yml
vendored
@ -1,39 +0,0 @@
|
||||
# Upload sccache stats to artifacts, and also as benchmark data when on an aws
|
||||
# linux or windows machine. Does not currently handle mac builds
|
||||
name: Upload sccache stats
|
||||
|
||||
description: Upload sccache stats to artifacts
|
||||
|
||||
inputs:
|
||||
github-token:
|
||||
description: GITHUB_TOKEN
|
||||
required: true
|
||||
build-time:
|
||||
description: Build time in seconds
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
steps:
|
||||
- name: Upload sccache to s3
|
||||
uses: seemethere/upload-artifact-s3@v5
|
||||
with:
|
||||
s3-prefix: |
|
||||
${{ github.repository }}/${{ github.run_id }}/${{ github.run_attempt }}/artifact
|
||||
retention-days: 14
|
||||
if-no-files-found: warn
|
||||
path: sccache-stats-*.json
|
||||
|
||||
- name: Format sccache stats
|
||||
shell: bash
|
||||
run: |
|
||||
python3 -m tools.stats.sccache_stats_to_benchmark_format
|
||||
env:
|
||||
BUILD_TIME: ${{ inputs.build-time }}
|
||||
|
||||
- name: Upload sccache stats as benchmark
|
||||
uses: pytorch/test-infra/.github/actions/upload-benchmark-results@main
|
||||
with:
|
||||
benchmark-results-dir: test/test-reports
|
||||
dry-run: false
|
||||
schema-version: v3
|
||||
github-token: ${{ inputs.github-token }}
|
||||
20
.github/actions/upload-test-artifacts/action.yml
vendored
20
.github/actions/upload-test-artifacts/action.yml
vendored
@ -28,7 +28,7 @@ runs:
|
||||
run: |
|
||||
# Remove any previous test jsons if they exist
|
||||
rm -f test-jsons-*.zip
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test/test-reports -i '*.json'
|
||||
zip -r "test-jsons-${FILE_SUFFIX}.zip" test -i '*.json'
|
||||
|
||||
- name: Zip test reports for upload
|
||||
if: runner.os != 'Windows' && !inputs.use-gha
|
||||
@ -38,7 +38,7 @@ runs:
|
||||
run: |
|
||||
# Remove any previous test reports if they exist
|
||||
rm -f test-reports-*.zip
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test/test-reports -i '*.xml' -i '*.csv'
|
||||
zip -r "test-reports-${FILE_SUFFIX}.zip" test -i '*.xml' -i '*.csv'
|
||||
|
||||
- name: Zip usage log for upload
|
||||
if: runner.os != 'Windows' && !inputs.use-gha
|
||||
@ -53,8 +53,8 @@ runs:
|
||||
if [ -f 'usage_log.txt' ]; then
|
||||
zip "logs-${FILE_SUFFIX}.zip" 'usage_log.txt'
|
||||
fi
|
||||
if find "test/test-reports" -name "*.log" 2>/dev/null | grep -q .; then
|
||||
zip -r "logs-${FILE_SUFFIX}.zip" test/test-reports -i '*.log'
|
||||
if ls test/**/*.log 1> /dev/null 2>&1; then
|
||||
zip -r "logs-${FILE_SUFFIX}.zip" test -i '*.log'
|
||||
fi
|
||||
|
||||
- name: Zip debugging artifacts for upload
|
||||
@ -77,7 +77,7 @@ runs:
|
||||
FILE_SUFFIX: ${{ inputs.file-suffix }}
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\test-reports\*.json'
|
||||
7z a "test-jsons-$Env:FILE_SUFFIX.zip" -ir'!test\*.json'
|
||||
|
||||
- name: Zip test reports for upload
|
||||
if: runner.os == 'Windows' && !inputs.use-gha
|
||||
@ -86,7 +86,7 @@ runs:
|
||||
FILE_SUFFIX: ${{ inputs.file-suffix }}
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\test-reports\*.xml' -ir'!test\test-reports\*.csv'
|
||||
7z a "test-reports-$Env:FILE_SUFFIX.zip" -ir'!test\*.xml' -ir'!test\*.csv'
|
||||
|
||||
- name: Zip usage log for upload
|
||||
if: runner.os == 'Windows' && !inputs.use-gha
|
||||
@ -96,7 +96,7 @@ runs:
|
||||
FILE_SUFFIX: ${{ inputs.file-suffix }}
|
||||
run: |
|
||||
# -ir => recursive include all files in pattern
|
||||
7z a "logs-$Env:FILE_SUFFIX.zip" 'usage_log.txt' -ir'!test\test-reports\*.log'
|
||||
7z a "logs-$Env:FILE_SUFFIX.zip" 'usage_log.txt' -ir'!test\*.log'
|
||||
|
||||
# S3 upload
|
||||
- name: Store Test Downloaded JSONs on S3
|
||||
@ -147,7 +147,7 @@ runs:
|
||||
|
||||
# GHA upload
|
||||
- name: Store Test Downloaded JSONs on Github
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v3
|
||||
if: inputs.use-gha
|
||||
continue-on-error: true
|
||||
with:
|
||||
@ -158,7 +158,7 @@ runs:
|
||||
path: test/**/*.json
|
||||
|
||||
- name: Store Test Reports on Github
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v3
|
||||
if: inputs.use-gha
|
||||
continue-on-error: true
|
||||
with:
|
||||
@ -172,7 +172,7 @@ runs:
|
||||
test/**/*.csv
|
||||
|
||||
- name: Store Usage Logs on Github
|
||||
uses: actions/upload-artifact@v4
|
||||
uses: actions/upload-artifact@v3
|
||||
if: inputs.use-gha
|
||||
continue-on-error: true
|
||||
with:
|
||||
|
||||
2
.github/ci_commit_pins/audio.txt
vendored
2
.github/ci_commit_pins/audio.txt
vendored
@ -1 +1 @@
|
||||
332760d4b300f00a0d862e3cfe1495db3b1a14f9
|
||||
3f0569939c4369bec943fc27d1c9d8dfbc828c26
|
||||
|
||||
1
.github/ci_commit_pins/torchao.txt
vendored
1
.github/ci_commit_pins/torchao.txt
vendored
@ -1 +0,0 @@
|
||||
51c87b6ead6b7e098ada95d6a7609ee873b854cf
|
||||
2
.github/ci_commit_pins/torchbench.txt
vendored
2
.github/ci_commit_pins/torchbench.txt
vendored
@ -1 +1 @@
|
||||
766a5e3a189384659fd35a68c3b17b88c761aaac
|
||||
23512dbebd44a11eb84afbf53c3c071dd105297e
|
||||
|
||||
2
.github/ci_commit_pins/xla.txt
vendored
2
.github/ci_commit_pins/xla.txt
vendored
@ -1 +1 @@
|
||||
2ec22641e390cda25ec7c61fcbce07507727d584
|
||||
2eb4a60ed14a38260b85b0c765161f0ce45be6d1
|
||||
|
||||
9
.github/labeler.yml
vendored
9
.github/labeler.yml
vendored
@ -35,11 +35,8 @@
|
||||
- torch/distributed/_tensor/**
|
||||
- torch/distributed/fsdp/**
|
||||
- torch/csrc/inductor/**
|
||||
- torch/csrc/dynamo/**
|
||||
- test/cpp/aoti_abi_check/**
|
||||
- test/cpp/aoti_inference/**
|
||||
- test/inductor/**
|
||||
- test/dynamo/**
|
||||
|
||||
"module: cpu":
|
||||
- aten/src/ATen/cpu/**
|
||||
@ -101,9 +98,3 @@
|
||||
"module: distributed_checkpoint":
|
||||
- torch/distributed/checkpoint/**
|
||||
- test/distributed/checkpoint/**
|
||||
|
||||
"module: compiled autograd":
|
||||
- torch/csrc/dynamo/python_compiled_autograd.cpp
|
||||
- torch/csrc/dynamo/compiled_autograd.h
|
||||
- torch/_dynamo/compiled_autograd.py
|
||||
- torch/inductor/test_compiled_autograd.py
|
||||
|
||||
251
.github/lf-canary-scale-config.yml
vendored
Normal file
251
.github/lf-canary-scale-config.yml
vendored
Normal file
@ -0,0 +1,251 @@
|
||||
|
||||
# This file is generated by .github/scripts/validate_scale_config.py in test-infra
|
||||
# It defines runner types that will be provisioned by by LF Self-hosted runners
|
||||
|
||||
# scale-config.yml:
|
||||
# Powers what instance types are available for GHA auto-scaled
|
||||
# runners. Runners listed here will be available as self hosted
|
||||
# runners, configuration is directly pulled from the main branch.
|
||||
#
|
||||
#
|
||||
# NOTES:
|
||||
# - Linux runners are by default non-ephemeral to reduce the amount of CreateInstaces calls
|
||||
# to avoid RequestLimitExceeded issues
|
||||
# - When updating this file, run the following command to validate the YAML and to generate
|
||||
# corresponding versions of scale-config for the pytorch/pytorch repo and merge the
|
||||
# pytorch/pytorch changes before merging these changes.
|
||||
# `python .github/scripts/validate_scale_config.py --test-infra-repo-root [path_to_test-infra_root] --pytorch-repo-root [path_to_pytorch_root]``
|
||||
#
|
||||
# TODO: Add some documentation on how the auto-scaling works
|
||||
#
|
||||
# NOTE: Default values,
|
||||
#
|
||||
# runner_types:
|
||||
# runner_label:
|
||||
# instance_type: m4.large
|
||||
# os: linux
|
||||
# max_available: 20
|
||||
# disk_size: 50
|
||||
# is_ephemeral: true
|
||||
|
||||
runner_types:
|
||||
lf.c.linux.12xlarge:
|
||||
disk_size: 200
|
||||
instance_type: c5.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.10xlarge.avx2:
|
||||
disk_size: 200
|
||||
instance_type: m4.10xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 450
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.24xl.spr-metal:
|
||||
disk_size: 200
|
||||
instance_type: c7i.metal-24xl
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.16xlarge.spr:
|
||||
disk_size: 200
|
||||
instance_type: c7i.16xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.9xlarge.ephemeral:
|
||||
disk_size: 200
|
||||
instance_type: c5.9xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 50
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
variants:
|
||||
am2:
|
||||
ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
|
||||
lf.c.linux.12xlarge.ephemeral:
|
||||
disk_size: 200
|
||||
instance_type: c5.12xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 300
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.16xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g3.16xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.24xlarge:
|
||||
disk_size: 150
|
||||
instance_type: c5.24xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 500
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.24xlarge.ephemeral:
|
||||
disk_size: 150
|
||||
instance_type: c5.24xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.2xlarge:
|
||||
disk_size: 150
|
||||
instance_type: c5.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 3120
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.4xlarge:
|
||||
disk_size: 150
|
||||
instance_type: c5.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.4xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g3.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.8xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g3.8xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 400
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g4dn.12xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g4dn.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 250
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g4dn.metal.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g4dn.metal
|
||||
is_ephemeral: false
|
||||
max_available: 300
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g5.48xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g5.48xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g5.12xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g5.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g5.4xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g5.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 2400
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.g6.4xlarge.experimental.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g6.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 50
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.large:
|
||||
max_available: 1200
|
||||
disk_size: 15
|
||||
instance_type: c5.large
|
||||
is_ephemeral: false
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.c.linux.arm64.2xlarge:
|
||||
disk_size: 256
|
||||
instance_type: t4g.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.linux.arm64.m7g.4xlarge:
|
||||
disk_size: 256
|
||||
instance_type: m7g.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.linux.arm64.2xlarge.ephemeral:
|
||||
disk_size: 256
|
||||
instance_type: t4g.2xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.linux.arm64.m7g.4xlarge.ephemeral:
|
||||
disk_size: 256
|
||||
instance_type: m7g.4xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.linux.arm64.m7g.metal:
|
||||
disk_size: 256
|
||||
instance_type: m7g.metal
|
||||
is_ephemeral: false
|
||||
max_available: 100
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.c.windows.g4dn.xlarge:
|
||||
disk_size: 256
|
||||
instance_type: g4dn.xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 100
|
||||
os: windows
|
||||
lf.c.windows.g4dn.xlarge.nonephemeral:
|
||||
disk_size: 256
|
||||
instance_type: g4dn.xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 100
|
||||
os: windows
|
||||
lf.c.windows.4xlarge:
|
||||
disk_size: 256
|
||||
instance_type: c5d.4xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 420
|
||||
os: windows
|
||||
lf.c.windows.4xlarge.nonephemeral:
|
||||
disk_size: 256
|
||||
instance_type: c5d.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 420
|
||||
os: windows
|
||||
lf.c.windows.8xlarge.nvidia.gpu:
|
||||
disk_size: 256
|
||||
instance_type: p3.2xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 300
|
||||
os: windows
|
||||
lf.c.windows.8xlarge.nvidia.gpu.nonephemeral:
|
||||
disk_size: 256
|
||||
instance_type: p3.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: windows
|
||||
lf.c.windows.g5.4xlarge.nvidia.gpu:
|
||||
disk_size: 256
|
||||
instance_type: g5.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 250
|
||||
os: windows
|
||||
251
.github/lf-scale-config.yml
vendored
Normal file
251
.github/lf-scale-config.yml
vendored
Normal file
@ -0,0 +1,251 @@
|
||||
|
||||
# This file is generated by .github/scripts/validate_scale_config.py in test-infra
|
||||
# It defines runner types that will be provisioned by by LF Self-hosted runners
|
||||
|
||||
# scale-config.yml:
|
||||
# Powers what instance types are available for GHA auto-scaled
|
||||
# runners. Runners listed here will be available as self hosted
|
||||
# runners, configuration is directly pulled from the main branch.
|
||||
#
|
||||
#
|
||||
# NOTES:
|
||||
# - Linux runners are by default non-ephemeral to reduce the amount of CreateInstaces calls
|
||||
# to avoid RequestLimitExceeded issues
|
||||
# - When updating this file, run the following command to validate the YAML and to generate
|
||||
# corresponding versions of scale-config for the pytorch/pytorch repo and merge the
|
||||
# pytorch/pytorch changes before merging these changes.
|
||||
# `python .github/scripts/validate_scale_config.py --test-infra-repo-root [path_to_test-infra_root] --pytorch-repo-root [path_to_pytorch_root]``
|
||||
#
|
||||
# TODO: Add some documentation on how the auto-scaling works
|
||||
#
|
||||
# NOTE: Default values,
|
||||
#
|
||||
# runner_types:
|
||||
# runner_label:
|
||||
# instance_type: m4.large
|
||||
# os: linux
|
||||
# max_available: 20
|
||||
# disk_size: 50
|
||||
# is_ephemeral: true
|
||||
|
||||
runner_types:
|
||||
lf.linux.12xlarge:
|
||||
disk_size: 200
|
||||
instance_type: c5.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.10xlarge.avx2:
|
||||
disk_size: 200
|
||||
instance_type: m4.10xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 450
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.24xl.spr-metal:
|
||||
disk_size: 200
|
||||
instance_type: c7i.metal-24xl
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.16xlarge.spr:
|
||||
disk_size: 200
|
||||
instance_type: c7i.16xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.9xlarge.ephemeral:
|
||||
disk_size: 200
|
||||
instance_type: c5.9xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 50
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
variants:
|
||||
am2:
|
||||
ami: amzn2-ami-hvm-2.0.20240306.2-x86_64-ebs
|
||||
lf.linux.12xlarge.ephemeral:
|
||||
disk_size: 200
|
||||
instance_type: c5.12xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 300
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.16xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g3.16xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.24xlarge:
|
||||
disk_size: 150
|
||||
instance_type: c5.24xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 500
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.24xlarge.ephemeral:
|
||||
disk_size: 150
|
||||
instance_type: c5.24xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.2xlarge:
|
||||
disk_size: 150
|
||||
instance_type: c5.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 3120
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.4xlarge:
|
||||
disk_size: 150
|
||||
instance_type: c5.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.4xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g3.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 1000
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.8xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g3.8xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 400
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.g4dn.12xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g4dn.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 250
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.g4dn.metal.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g4dn.metal
|
||||
is_ephemeral: false
|
||||
max_available: 300
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.g5.48xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g5.48xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.g5.12xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g5.12xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.g5.4xlarge.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g5.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 2400
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.g6.4xlarge.experimental.nvidia.gpu:
|
||||
disk_size: 150
|
||||
instance_type: g6.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 50
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.large:
|
||||
max_available: 1200
|
||||
disk_size: 15
|
||||
instance_type: c5.large
|
||||
is_ephemeral: false
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-x86_64
|
||||
lf.linux.arm64.2xlarge:
|
||||
disk_size: 256
|
||||
instance_type: t4g.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.linux.arm64.m7g.4xlarge:
|
||||
disk_size: 256
|
||||
instance_type: m7g.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.linux.arm64.2xlarge.ephemeral:
|
||||
disk_size: 256
|
||||
instance_type: t4g.2xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.linux.arm64.m7g.4xlarge.ephemeral:
|
||||
disk_size: 256
|
||||
instance_type: m7g.4xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 200
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.linux.arm64.m7g.metal:
|
||||
disk_size: 256
|
||||
instance_type: m7g.metal
|
||||
is_ephemeral: false
|
||||
max_available: 100
|
||||
os: linux
|
||||
ami: al2023-ami-2023.5.202*-kernel-6.1-arm64
|
||||
lf.windows.g4dn.xlarge:
|
||||
disk_size: 256
|
||||
instance_type: g4dn.xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 100
|
||||
os: windows
|
||||
lf.windows.g4dn.xlarge.nonephemeral:
|
||||
disk_size: 256
|
||||
instance_type: g4dn.xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 100
|
||||
os: windows
|
||||
lf.windows.4xlarge:
|
||||
disk_size: 256
|
||||
instance_type: c5d.4xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 420
|
||||
os: windows
|
||||
lf.windows.4xlarge.nonephemeral:
|
||||
disk_size: 256
|
||||
instance_type: c5d.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 420
|
||||
os: windows
|
||||
lf.windows.8xlarge.nvidia.gpu:
|
||||
disk_size: 256
|
||||
instance_type: p3.2xlarge
|
||||
is_ephemeral: true
|
||||
max_available: 300
|
||||
os: windows
|
||||
lf.windows.8xlarge.nvidia.gpu.nonephemeral:
|
||||
disk_size: 256
|
||||
instance_type: p3.2xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 150
|
||||
os: windows
|
||||
lf.windows.g5.4xlarge.nvidia.gpu:
|
||||
disk_size: 256
|
||||
instance_type: g5.4xlarge
|
||||
is_ephemeral: false
|
||||
max_available: 250
|
||||
os: windows
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user