mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-24 23:54:56 +08:00
Compare commits
1 Commits
v2.9.0-rc6
...
codex-test
| Author | SHA1 | Date | |
|---|---|---|---|
| bc67bce2e5 |
@ -1,15 +0,0 @@
|
|||||||
version: 1
|
|
||||||
paths:
|
|
||||||
include:
|
|
||||||
- "**/*.py"
|
|
||||||
exclude:
|
|
||||||
- ".*"
|
|
||||||
- ".*/**"
|
|
||||||
- "**/.*/**"
|
|
||||||
- "**/.*"
|
|
||||||
- "**/_*/**"
|
|
||||||
- "**/_*.py"
|
|
||||||
- "**/test/**"
|
|
||||||
- "**/benchmarks/**"
|
|
||||||
- "**/test_*.py"
|
|
||||||
- "**/*_test.py"
|
|
||||||
@ -3,20 +3,8 @@ set -eux -o pipefail
|
|||||||
|
|
||||||
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
|
||||||
|
|
||||||
# Set CUDA architecture lists to match x86 build_cuda.sh
|
if [[ "$GPU_ARCH_VERSION" == *"12.9"* ]]; then
|
||||||
if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
|
|
||||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0"
|
|
||||||
elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
|
|
||||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
|
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
|
||||||
elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then
|
|
||||||
export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX"
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Compress the fatbin with -compress-mode=size for CUDA 13
|
|
||||||
if [[ "$DESIRED_CUDA" == *"13"* ]]; then
|
|
||||||
export TORCH_NVCC_FLAGS="-compress-mode=size"
|
|
||||||
# Bundle ptxas into the cu13 wheel, see https://github.com/pytorch/pytorch/issues/163801
|
|
||||||
export BUILD_BUNDLE_PTXAS=1
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
|
SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
|
||||||
@ -30,7 +18,7 @@ cd /
|
|||||||
# on the mounted pytorch repo
|
# on the mounted pytorch repo
|
||||||
git config --global --add safe.directory /pytorch
|
git config --global --add safe.directory /pytorch
|
||||||
pip install -r /pytorch/requirements.txt
|
pip install -r /pytorch/requirements.txt
|
||||||
pip install auditwheel==6.2.0 wheel
|
pip install auditwheel==6.2.0
|
||||||
if [ "$DESIRED_CUDA" = "cpu" ]; then
|
if [ "$DESIRED_CUDA" = "cpu" ]; then
|
||||||
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
|
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
|
||||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||||
@ -38,16 +26,6 @@ if [ "$DESIRED_CUDA" = "cpu" ]; then
|
|||||||
else
|
else
|
||||||
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
|
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
|
||||||
export USE_SYSTEM_NCCL=1
|
export USE_SYSTEM_NCCL=1
|
||||||
|
|
||||||
# Check if we should use NVIDIA libs from PyPI (similar to x86 build_cuda.sh logic)
|
|
||||||
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
|
|
||||||
echo "Bundling CUDA libraries with wheel for aarch64."
|
|
||||||
else
|
|
||||||
echo "Using nvidia libs from pypi for aarch64."
|
|
||||||
echo "Updated PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64: $PYTORCH_EXTRA_INSTALL_REQUIREMENTS"
|
|
||||||
export USE_NVIDIA_PYPI_LIBS=1
|
|
||||||
fi
|
|
||||||
|
|
||||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||||
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
|
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -69,137 +69,29 @@ def replace_tag(filename) -> None:
|
|||||||
f.writelines(lines)
|
f.writelines(lines)
|
||||||
|
|
||||||
|
|
||||||
def patch_library_rpath(
|
|
||||||
folder: str,
|
|
||||||
lib_name: str,
|
|
||||||
use_nvidia_pypi_libs: bool = False,
|
|
||||||
desired_cuda: str = "",
|
|
||||||
) -> None:
|
|
||||||
"""Apply patchelf to set RPATH for a library in torch/lib"""
|
|
||||||
lib_path = f"{folder}/tmp/torch/lib/{lib_name}"
|
|
||||||
|
|
||||||
if use_nvidia_pypi_libs:
|
|
||||||
# For PyPI NVIDIA libraries, construct CUDA RPATH
|
|
||||||
cuda_rpaths = [
|
|
||||||
"$ORIGIN/../../nvidia/cudnn/lib",
|
|
||||||
"$ORIGIN/../../nvidia/nvshmem/lib",
|
|
||||||
"$ORIGIN/../../nvidia/nccl/lib",
|
|
||||||
"$ORIGIN/../../nvidia/cusparselt/lib",
|
|
||||||
]
|
|
||||||
|
|
||||||
if "130" in desired_cuda:
|
|
||||||
cuda_rpaths.append("$ORIGIN/../../nvidia/cu13/lib")
|
|
||||||
else:
|
|
||||||
cuda_rpaths.extend(
|
|
||||||
[
|
|
||||||
"$ORIGIN/../../nvidia/cublas/lib",
|
|
||||||
"$ORIGIN/../../nvidia/cuda_cupti/lib",
|
|
||||||
"$ORIGIN/../../nvidia/cuda_nvrtc/lib",
|
|
||||||
"$ORIGIN/../../nvidia/cuda_runtime/lib",
|
|
||||||
"$ORIGIN/../../nvidia/cufft/lib",
|
|
||||||
"$ORIGIN/../../nvidia/curand/lib",
|
|
||||||
"$ORIGIN/../../nvidia/cusolver/lib",
|
|
||||||
"$ORIGIN/../../nvidia/cusparse/lib",
|
|
||||||
"$ORIGIN/../../nvidia/nvtx/lib",
|
|
||||||
"$ORIGIN/../../nvidia/cufile/lib",
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add $ORIGIN for local torch libs
|
|
||||||
rpath = ":".join(cuda_rpaths) + ":$ORIGIN"
|
|
||||||
else:
|
|
||||||
# For bundled libraries, just use $ORIGIN
|
|
||||||
rpath = "$ORIGIN"
|
|
||||||
|
|
||||||
if os.path.exists(lib_path):
|
|
||||||
os.system(
|
|
||||||
f"cd {folder}/tmp/torch/lib/; "
|
|
||||||
f"patchelf --set-rpath '{rpath}' --force-rpath {lib_name}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def copy_and_patch_library(
|
|
||||||
src_path: str,
|
|
||||||
folder: str,
|
|
||||||
use_nvidia_pypi_libs: bool = False,
|
|
||||||
desired_cuda: str = "",
|
|
||||||
) -> None:
|
|
||||||
"""Copy a library to torch/lib and patch its RPATH"""
|
|
||||||
if os.path.exists(src_path):
|
|
||||||
lib_name = os.path.basename(src_path)
|
|
||||||
shutil.copy2(src_path, f"{folder}/tmp/torch/lib/{lib_name}")
|
|
||||||
patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda)
|
|
||||||
|
|
||||||
|
|
||||||
def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
||||||
"""
|
"""
|
||||||
Package the cuda wheel libraries
|
Package the cuda wheel libraries
|
||||||
"""
|
"""
|
||||||
folder = os.path.dirname(wheel_path)
|
folder = os.path.dirname(wheel_path)
|
||||||
|
wheelname = os.path.basename(wheel_path)
|
||||||
os.mkdir(f"{folder}/tmp")
|
os.mkdir(f"{folder}/tmp")
|
||||||
os.system(f"unzip {wheel_path} -d {folder}/tmp")
|
os.system(f"unzip {wheel_path} -d {folder}/tmp")
|
||||||
# Delete original wheel since it will be repackaged
|
libs_to_copy = [
|
||||||
os.system(f"rm {wheel_path}")
|
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
|
||||||
|
|
||||||
# Check if we should use PyPI NVIDIA libraries or bundle system libraries
|
|
||||||
use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1"
|
|
||||||
|
|
||||||
if use_nvidia_pypi_libs:
|
|
||||||
print("Using nvidia libs from pypi - skipping CUDA library bundling")
|
|
||||||
# For PyPI approach, we don't bundle CUDA libraries - they come from PyPI packages
|
|
||||||
# We only need to bundle non-NVIDIA libraries
|
|
||||||
minimal_libs_to_copy = [
|
|
||||||
"/lib64/libgomp.so.1",
|
|
||||||
"/usr/lib64/libgfortran.so.5",
|
|
||||||
"/acl/build/libarm_compute.so",
|
|
||||||
"/acl/build/libarm_compute_graph.so",
|
|
||||||
"/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
|
|
||||||
"/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
|
|
||||||
"/usr/local/lib/libnvpl_lapack_core.so.0",
|
|
||||||
"/usr/local/lib/libnvpl_blas_core.so.0",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Copy minimal libraries to unzipped_folder/torch/lib
|
|
||||||
for lib_path in minimal_libs_to_copy:
|
|
||||||
copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda)
|
|
||||||
|
|
||||||
# Patch torch libraries used for searching libraries
|
|
||||||
torch_libs_to_patch = [
|
|
||||||
"libtorch.so",
|
|
||||||
"libtorch_cpu.so",
|
|
||||||
"libtorch_cuda.so",
|
|
||||||
"libtorch_cuda_linalg.so",
|
|
||||||
"libtorch_global_deps.so",
|
|
||||||
"libtorch_python.so",
|
|
||||||
"libtorch_nvshmem.so",
|
|
||||||
"libc10.so",
|
|
||||||
"libc10_cuda.so",
|
|
||||||
"libcaffe2_nvrtc.so",
|
|
||||||
"libshm.so",
|
|
||||||
]
|
|
||||||
for lib_name in torch_libs_to_patch:
|
|
||||||
patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda)
|
|
||||||
else:
|
|
||||||
print("Bundling CUDA libraries with wheel")
|
|
||||||
# Original logic for bundling system CUDA libraries
|
|
||||||
# Common libraries for all CUDA versions
|
|
||||||
common_libs = [
|
|
||||||
# Non-NVIDIA system libraries
|
|
||||||
"/lib64/libgomp.so.1",
|
|
||||||
"/usr/lib64/libgfortran.so.5",
|
|
||||||
"/acl/build/libarm_compute.so",
|
|
||||||
"/acl/build/libarm_compute_graph.so",
|
|
||||||
# Common CUDA libraries (same for all versions)
|
|
||||||
"/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
|
|
||||||
"/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
|
|
||||||
"/usr/local/lib/libnvpl_lapack_core.so.0",
|
|
||||||
"/usr/local/lib/libnvpl_blas_core.so.0",
|
|
||||||
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so",
|
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so",
|
||||||
"/usr/local/cuda/lib64/libcudnn.so.9",
|
"/usr/local/cuda/lib64/libcudnn.so.9",
|
||||||
|
"/usr/local/cuda/lib64/libcublas.so.12",
|
||||||
|
"/usr/local/cuda/lib64/libcublasLt.so.12",
|
||||||
|
"/usr/local/cuda/lib64/libcudart.so.12",
|
||||||
|
"/usr/local/cuda/lib64/libcufft.so.11",
|
||||||
|
"/usr/local/cuda/lib64/libcusparse.so.12",
|
||||||
"/usr/local/cuda/lib64/libcusparseLt.so.0",
|
"/usr/local/cuda/lib64/libcusparseLt.so.0",
|
||||||
|
"/usr/local/cuda/lib64/libcusolver.so.11",
|
||||||
"/usr/local/cuda/lib64/libcurand.so.10",
|
"/usr/local/cuda/lib64/libcurand.so.10",
|
||||||
"/usr/local/cuda/lib64/libnccl.so.2",
|
"/usr/local/cuda/lib64/libnccl.so.2",
|
||||||
"/usr/local/cuda/lib64/libnvshmem_host.so.3",
|
"/usr/local/cuda/lib64/libnvJitLink.so.12",
|
||||||
|
"/usr/local/cuda/lib64/libnvrtc.so.12",
|
||||||
"/usr/local/cuda/lib64/libcudnn_adv.so.9",
|
"/usr/local/cuda/lib64/libcudnn_adv.so.9",
|
||||||
"/usr/local/cuda/lib64/libcudnn_cnn.so.9",
|
"/usr/local/cuda/lib64/libcudnn_cnn.so.9",
|
||||||
"/usr/local/cuda/lib64/libcudnn_graph.so.9",
|
"/usr/local/cuda/lib64/libcudnn_graph.so.9",
|
||||||
@ -207,48 +99,31 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
|||||||
"/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9",
|
"/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9",
|
||||||
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9",
|
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9",
|
||||||
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9",
|
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9",
|
||||||
|
"/lib64/libgomp.so.1",
|
||||||
|
"/usr/lib64/libgfortran.so.5",
|
||||||
|
"/acl/build/libarm_compute.so",
|
||||||
|
"/acl/build/libarm_compute_graph.so",
|
||||||
|
"/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
|
||||||
|
"/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
|
||||||
|
"/usr/local/lib/libnvpl_lapack_core.so.0",
|
||||||
|
"/usr/local/lib/libnvpl_blas_core.so.0",
|
||||||
|
]
|
||||||
|
|
||||||
|
if "129" in desired_cuda:
|
||||||
|
libs_to_copy += [
|
||||||
|
"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.9",
|
||||||
"/usr/local/cuda/lib64/libcufile.so.0",
|
"/usr/local/cuda/lib64/libcufile.so.0",
|
||||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
|
"/usr/local/cuda/lib64/libcufile_rdma.so.1",
|
||||||
"/usr/local/cuda/lib64/libcusparse.so.12",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
# CUDA version-specific libraries
|
# Copy libraries to unzipped_folder/a/lib
|
||||||
if "13" in desired_cuda:
|
|
||||||
minor_version = desired_cuda[-1]
|
|
||||||
version_specific_libs = [
|
|
||||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13",
|
|
||||||
"/usr/local/cuda/lib64/libcublas.so.13",
|
|
||||||
"/usr/local/cuda/lib64/libcublasLt.so.13",
|
|
||||||
"/usr/local/cuda/lib64/libcudart.so.13",
|
|
||||||
"/usr/local/cuda/lib64/libcufft.so.12",
|
|
||||||
"/usr/local/cuda/lib64/libcusolver.so.12",
|
|
||||||
"/usr/local/cuda/lib64/libnvJitLink.so.13",
|
|
||||||
"/usr/local/cuda/lib64/libnvrtc.so.13",
|
|
||||||
f"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.{minor_version}",
|
|
||||||
]
|
|
||||||
elif "12" in desired_cuda:
|
|
||||||
# Get the last character for libnvrtc-builtins version (e.g., "129" -> "9")
|
|
||||||
minor_version = desired_cuda[-1]
|
|
||||||
version_specific_libs = [
|
|
||||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
|
|
||||||
"/usr/local/cuda/lib64/libcublas.so.12",
|
|
||||||
"/usr/local/cuda/lib64/libcublasLt.so.12",
|
|
||||||
"/usr/local/cuda/lib64/libcudart.so.12",
|
|
||||||
"/usr/local/cuda/lib64/libcufft.so.11",
|
|
||||||
"/usr/local/cuda/lib64/libcusolver.so.11",
|
|
||||||
"/usr/local/cuda/lib64/libnvJitLink.so.12",
|
|
||||||
"/usr/local/cuda/lib64/libnvrtc.so.12",
|
|
||||||
f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}",
|
|
||||||
]
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported CUDA version: {desired_cuda}.")
|
|
||||||
|
|
||||||
# Combine all libraries
|
|
||||||
libs_to_copy = common_libs + version_specific_libs
|
|
||||||
|
|
||||||
# Copy libraries to unzipped_folder/torch/lib
|
|
||||||
for lib_path in libs_to_copy:
|
for lib_path in libs_to_copy:
|
||||||
copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda)
|
lib_name = os.path.basename(lib_path)
|
||||||
|
shutil.copy2(lib_path, f"{folder}/tmp/torch/lib/{lib_name}")
|
||||||
|
os.system(
|
||||||
|
f"cd {folder}/tmp/torch/lib/; "
|
||||||
|
f"patchelf --set-rpath '$ORIGIN' --force-rpath {folder}/tmp/torch/lib/{lib_name}"
|
||||||
|
)
|
||||||
|
|
||||||
# Make sure the wheel is tagged with manylinux_2_28
|
# Make sure the wheel is tagged with manylinux_2_28
|
||||||
for f in os.scandir(f"{folder}/tmp/"):
|
for f in os.scandir(f"{folder}/tmp/"):
|
||||||
@ -256,8 +131,14 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None:
|
|||||||
replace_tag(f"{f.path}/WHEEL")
|
replace_tag(f"{f.path}/WHEEL")
|
||||||
break
|
break
|
||||||
|
|
||||||
os.system(f"wheel pack {folder}/tmp/ -d {folder}")
|
os.mkdir(f"{folder}/cuda_wheel")
|
||||||
os.system(f"rm -rf {folder}/tmp/")
|
os.system(f"cd {folder}/tmp/; zip -r {folder}/cuda_wheel/{wheelname} *")
|
||||||
|
shutil.move(
|
||||||
|
f"{folder}/cuda_wheel/{wheelname}",
|
||||||
|
f"{folder}/{wheelname}",
|
||||||
|
copy_function=shutil.copy2,
|
||||||
|
)
|
||||||
|
os.system(f"rm -rf {folder}/tmp/ {folder}/cuda_wheel/")
|
||||||
|
|
||||||
|
|
||||||
def complete_wheel(folder: str) -> str:
|
def complete_wheel(folder: str) -> str:
|
||||||
@ -280,7 +161,14 @@ def complete_wheel(folder: str) -> str:
|
|||||||
f"/{folder}/dist/{repaired_wheel_name}",
|
f"/{folder}/dist/{repaired_wheel_name}",
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
repaired_wheel_name = list_dir(f"/{folder}/dist")[0]
|
repaired_wheel_name = wheel_name.replace(
|
||||||
|
"linux_aarch64", "manylinux_2_28_aarch64"
|
||||||
|
)
|
||||||
|
print(f"Renaming {wheel_name} wheel to {repaired_wheel_name}")
|
||||||
|
os.rename(
|
||||||
|
f"/{folder}/dist/{wheel_name}",
|
||||||
|
f"/{folder}/dist/{repaired_wheel_name}",
|
||||||
|
)
|
||||||
|
|
||||||
print(f"Copying {repaired_wheel_name} to artifacts")
|
print(f"Copying {repaired_wheel_name} to artifacts")
|
||||||
shutil.copy2(
|
shutil.copy2(
|
||||||
@ -320,17 +208,7 @@ if __name__ == "__main__":
|
|||||||
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
|
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
|
||||||
# MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
|
# MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
|
||||||
if enable_cuda:
|
if enable_cuda:
|
||||||
build_vars += "MAX_JOBS=5 "
|
build_vars = "MAX_JOBS=5 " + build_vars
|
||||||
|
|
||||||
# Handle PyPI NVIDIA libraries vs bundled libraries
|
|
||||||
use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1"
|
|
||||||
if use_nvidia_pypi_libs:
|
|
||||||
print("Configuring build for PyPI NVIDIA libraries")
|
|
||||||
# Configure for dynamic linking (matching x86 logic)
|
|
||||||
build_vars += "ATEN_STATIC_CUDA=0 USE_CUDA_STATIC_LINK=0 USE_CUPTI_SO=1 "
|
|
||||||
else:
|
|
||||||
print("Configuring build for bundled NVIDIA libraries")
|
|
||||||
# Keep existing static linking approach - already configured above
|
|
||||||
|
|
||||||
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
|
override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
|
||||||
desired_cuda = os.getenv("DESIRED_CUDA")
|
desired_cuda = os.getenv("DESIRED_CUDA")
|
||||||
|
|||||||
@ -438,7 +438,9 @@ def build_torchvision(
|
|||||||
)
|
)
|
||||||
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
||||||
elif build_version is not None:
|
elif build_version is not None:
|
||||||
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
|
build_vars += (
|
||||||
|
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
|
||||||
|
)
|
||||||
if host.using_docker():
|
if host.using_docker():
|
||||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||||
|
|
||||||
@ -493,7 +495,9 @@ def build_torchdata(
|
|||||||
)
|
)
|
||||||
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
||||||
elif build_version is not None:
|
elif build_version is not None:
|
||||||
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
|
build_vars += (
|
||||||
|
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
|
||||||
|
)
|
||||||
if host.using_docker():
|
if host.using_docker():
|
||||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||||
|
|
||||||
@ -549,7 +553,9 @@ def build_torchtext(
|
|||||||
)
|
)
|
||||||
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
||||||
elif build_version is not None:
|
elif build_version is not None:
|
||||||
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
|
build_vars += (
|
||||||
|
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
|
||||||
|
)
|
||||||
if host.using_docker():
|
if host.using_docker():
|
||||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||||
|
|
||||||
@ -607,7 +613,9 @@ def build_torchaudio(
|
|||||||
)
|
)
|
||||||
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
build_vars += f"BUILD_VERSION={version}.dev{build_date}"
|
||||||
elif build_version is not None:
|
elif build_version is not None:
|
||||||
build_vars += f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-', maxsplit=1)[0]}"
|
build_vars += (
|
||||||
|
f"BUILD_VERSION={build_version} PYTORCH_VERSION={branch[1:].split('-')[0]}"
|
||||||
|
)
|
||||||
if host.using_docker():
|
if host.using_docker():
|
||||||
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
build_vars += " CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000"
|
||||||
|
|
||||||
|
|||||||
@ -64,10 +64,6 @@ FROM cuda as cuda12.9
|
|||||||
RUN bash ./install_cuda.sh 12.9
|
RUN bash ./install_cuda.sh 12.9
|
||||||
ENV DESIRED_CUDA=12.9
|
ENV DESIRED_CUDA=12.9
|
||||||
|
|
||||||
FROM cuda as cuda13.0
|
|
||||||
RUN bash ./install_cuda.sh 13.0
|
|
||||||
ENV DESIRED_CUDA=13.0
|
|
||||||
|
|
||||||
FROM ${ROCM_IMAGE} as rocm
|
FROM ${ROCM_IMAGE} as rocm
|
||||||
ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
|
||||||
ADD ./common/install_mkl.sh install_mkl.sh
|
ADD ./common/install_mkl.sh install_mkl.sh
|
||||||
@ -80,10 +76,10 @@ ADD ./common/install_mnist.sh install_mnist.sh
|
|||||||
RUN bash ./install_mnist.sh
|
RUN bash ./install_mnist.sh
|
||||||
|
|
||||||
FROM base as all_cuda
|
FROM base as all_cuda
|
||||||
|
COPY --from=cuda11.8 /usr/local/cuda-11.8 /usr/local/cuda-11.8
|
||||||
COPY --from=cuda12.6 /usr/local/cuda-12.6 /usr/local/cuda-12.6
|
COPY --from=cuda12.6 /usr/local/cuda-12.6 /usr/local/cuda-12.6
|
||||||
COPY --from=cuda12.8 /usr/local/cuda-12.8 /usr/local/cuda-12.8
|
COPY --from=cuda12.8 /usr/local/cuda-12.8 /usr/local/cuda-12.8
|
||||||
COPY --from=cuda12.9 /usr/local/cuda-12.9 /usr/local/cuda-12.9
|
COPY --from=cuda12.9 /usr/local/cuda-12.9 /usr/local/cuda-12.9
|
||||||
COPY --from=cuda13.0 /usr/local/cuda-13.0 /usr/local/cuda-13.0
|
|
||||||
|
|
||||||
# Final step
|
# Final step
|
||||||
FROM ${BASE_TARGET} as final
|
FROM ${BASE_TARGET} as final
|
||||||
|
|||||||
@ -76,13 +76,10 @@ elif [[ "$image" == *cuda*linter* ]]; then
|
|||||||
elif [[ "$image" == *linter* ]]; then
|
elif [[ "$image" == *linter* ]]; then
|
||||||
# Use a separate Dockerfile for linter to keep a small image size
|
# Use a separate Dockerfile for linter to keep a small image size
|
||||||
DOCKERFILE="linter/Dockerfile"
|
DOCKERFILE="linter/Dockerfile"
|
||||||
elif [[ "$image" == *riscv* ]]; then
|
|
||||||
# Use RISC-V specific Dockerfile
|
|
||||||
DOCKERFILE="ubuntu-cross-riscv/Dockerfile"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
_UCX_COMMIT=7836b165abdbe468a2f607e7254011c07d788152
|
_UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb
|
||||||
_UCC_COMMIT=430e241bf5d38cbc73fc7a6b89155397232e3f96
|
_UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b
|
||||||
if [[ "$image" == *rocm* ]]; then
|
if [[ "$image" == *rocm* ]]; then
|
||||||
_UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
|
_UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
|
||||||
_UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
|
_UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
|
||||||
@ -114,19 +111,31 @@ case "$tag" in
|
|||||||
UCC_COMMIT=${_UCC_COMMIT}
|
UCC_COMMIT=${_UCC_COMMIT}
|
||||||
TRITON=yes
|
TRITON=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11)
|
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
|
||||||
CUDA_VERSION=13.0.0
|
CUDA_VERSION=12.8.1
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.10
|
||||||
GCC_VERSION=11
|
GCC_VERSION=9
|
||||||
VISION=yes
|
VISION=yes
|
||||||
KATEX=yes
|
KATEX=yes
|
||||||
UCX_COMMIT=${_UCX_COMMIT}
|
UCX_COMMIT=${_UCX_COMMIT}
|
||||||
UCC_COMMIT=${_UCC_COMMIT}
|
UCC_COMMIT=${_UCC_COMMIT}
|
||||||
TRITON=yes
|
TRITON=yes
|
||||||
|
INDUCTOR_BENCHMARKS=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
|
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks)
|
||||||
CUDA_VERSION=12.8.1
|
CUDA_VERSION=12.8.1
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.12
|
||||||
|
GCC_VERSION=9
|
||||||
|
VISION=yes
|
||||||
|
KATEX=yes
|
||||||
|
UCX_COMMIT=${_UCX_COMMIT}
|
||||||
|
UCC_COMMIT=${_UCC_COMMIT}
|
||||||
|
TRITON=yes
|
||||||
|
INDUCTOR_BENCHMARKS=yes
|
||||||
|
;;
|
||||||
|
pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks)
|
||||||
|
CUDA_VERSION=12.8.1
|
||||||
|
ANACONDA_PYTHON_VERSION=3.13
|
||||||
GCC_VERSION=9
|
GCC_VERSION=9
|
||||||
VISION=yes
|
VISION=yes
|
||||||
KATEX=yes
|
KATEX=yes
|
||||||
@ -156,18 +165,18 @@ case "$tag" in
|
|||||||
TRITON=yes
|
TRITON=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-py3-clang12-onnx)
|
pytorch-linux-jammy-py3-clang12-onnx)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.9
|
||||||
CLANG_VERSION=12
|
CLANG_VERSION=12
|
||||||
VISION=yes
|
VISION=yes
|
||||||
ONNX=yes
|
ONNX=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-py3.10-clang12)
|
pytorch-linux-jammy-py3.9-clang12)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.9
|
||||||
CLANG_VERSION=12
|
CLANG_VERSION=12
|
||||||
VISION=yes
|
VISION=yes
|
||||||
TRITON=yes
|
TRITON=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-jammy-rocm-n-py3-benchmarks | pytorch-linux-noble-rocm-n-py3)
|
pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-noble-rocm-n-py3)
|
||||||
if [[ $tag =~ "jammy" ]]; then
|
if [[ $tag =~ "jammy" ]]; then
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.10
|
||||||
else
|
else
|
||||||
@ -181,9 +190,7 @@ case "$tag" in
|
|||||||
KATEX=yes
|
KATEX=yes
|
||||||
UCX_COMMIT=${_UCX_COMMIT}
|
UCX_COMMIT=${_UCX_COMMIT}
|
||||||
UCC_COMMIT=${_UCC_COMMIT}
|
UCC_COMMIT=${_UCC_COMMIT}
|
||||||
if [[ $tag =~ "benchmarks" ]]; then
|
|
||||||
INDUCTOR_BENCHMARKS=yes
|
INDUCTOR_BENCHMARKS=yes
|
||||||
fi
|
|
||||||
;;
|
;;
|
||||||
pytorch-linux-noble-rocm-alpha-py3)
|
pytorch-linux-noble-rocm-alpha-py3)
|
||||||
ANACONDA_PYTHON_VERSION=3.12
|
ANACONDA_PYTHON_VERSION=3.12
|
||||||
@ -195,26 +202,27 @@ case "$tag" in
|
|||||||
KATEX=yes
|
KATEX=yes
|
||||||
UCX_COMMIT=${_UCX_COMMIT}
|
UCX_COMMIT=${_UCX_COMMIT}
|
||||||
UCC_COMMIT=${_UCC_COMMIT}
|
UCC_COMMIT=${_UCC_COMMIT}
|
||||||
|
INDUCTOR_BENCHMARKS=yes
|
||||||
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950"
|
PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950"
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-xpu-n-1-py3)
|
pytorch-linux-jammy-xpu-2025.0-py3)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.9
|
||||||
|
GCC_VERSION=11
|
||||||
|
VISION=yes
|
||||||
|
XPU_VERSION=2025.0
|
||||||
|
NINJA_VERSION=1.9.0
|
||||||
|
TRITON=yes
|
||||||
|
;;
|
||||||
|
pytorch-linux-jammy-xpu-2025.1-py3)
|
||||||
|
ANACONDA_PYTHON_VERSION=3.9
|
||||||
GCC_VERSION=11
|
GCC_VERSION=11
|
||||||
VISION=yes
|
VISION=yes
|
||||||
XPU_VERSION=2025.1
|
XPU_VERSION=2025.1
|
||||||
NINJA_VERSION=1.9.0
|
NINJA_VERSION=1.9.0
|
||||||
TRITON=yes
|
TRITON=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-xpu-n-py3)
|
pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.9
|
||||||
GCC_VERSION=11
|
|
||||||
VISION=yes
|
|
||||||
XPU_VERSION=2025.2
|
|
||||||
NINJA_VERSION=1.9.0
|
|
||||||
TRITON=yes
|
|
||||||
;;
|
|
||||||
pytorch-linux-jammy-py3-gcc11-inductor-benchmarks)
|
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
|
||||||
GCC_VERSION=11
|
GCC_VERSION=11
|
||||||
VISION=yes
|
VISION=yes
|
||||||
KATEX=yes
|
KATEX=yes
|
||||||
@ -222,8 +230,8 @@ case "$tag" in
|
|||||||
DOCS=yes
|
DOCS=yes
|
||||||
INDUCTOR_BENCHMARKS=yes
|
INDUCTOR_BENCHMARKS=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12)
|
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.9
|
||||||
CUDA_VERSION=12.8.1
|
CUDA_VERSION=12.8.1
|
||||||
CLANG_VERSION=12
|
CLANG_VERSION=12
|
||||||
VISION=yes
|
VISION=yes
|
||||||
@ -234,8 +242,8 @@ case "$tag" in
|
|||||||
CLANG_VERSION=18
|
CLANG_VERSION=18
|
||||||
VISION=yes
|
VISION=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-py3.10-gcc11)
|
pytorch-linux-jammy-py3.9-gcc11)
|
||||||
ANACONDA_PYTHON_VERSION=3.10
|
ANACONDA_PYTHON_VERSION=3.9
|
||||||
GCC_VERSION=11
|
GCC_VERSION=11
|
||||||
VISION=yes
|
VISION=yes
|
||||||
KATEX=yes
|
KATEX=yes
|
||||||
@ -262,10 +270,13 @@ case "$tag" in
|
|||||||
TRITON_CPU=yes
|
TRITON_CPU=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-linter)
|
pytorch-linux-jammy-linter)
|
||||||
PYTHON_VERSION=3.10
|
# TODO: Use 3.9 here because of this issue https://github.com/python/mypy/issues/13627.
|
||||||
|
# We will need to update mypy version eventually, but that's for another day. The task
|
||||||
|
# would be to upgrade mypy to 1.0.0 with Python 3.11
|
||||||
|
PYTHON_VERSION=3.9
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter)
|
pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-linter)
|
||||||
PYTHON_VERSION=3.10
|
PYTHON_VERSION=3.9
|
||||||
CUDA_VERSION=12.8.1
|
CUDA_VERSION=12.8.1
|
||||||
;;
|
;;
|
||||||
pytorch-linux-jammy-aarch64-py3.10-gcc11)
|
pytorch-linux-jammy-aarch64-py3.10-gcc11)
|
||||||
@ -273,6 +284,7 @@ case "$tag" in
|
|||||||
GCC_VERSION=11
|
GCC_VERSION=11
|
||||||
ACL=yes
|
ACL=yes
|
||||||
VISION=yes
|
VISION=yes
|
||||||
|
CONDA_CMAKE=yes
|
||||||
OPENBLAS=yes
|
OPENBLAS=yes
|
||||||
# snadampal: skipping llvm src build install because the current version
|
# snadampal: skipping llvm src build install because the current version
|
||||||
# from pytorch/llvm:9.0.1 is x86 specific
|
# from pytorch/llvm:9.0.1 is x86 specific
|
||||||
@ -283,15 +295,13 @@ case "$tag" in
|
|||||||
GCC_VERSION=11
|
GCC_VERSION=11
|
||||||
ACL=yes
|
ACL=yes
|
||||||
VISION=yes
|
VISION=yes
|
||||||
|
CONDA_CMAKE=yes
|
||||||
OPENBLAS=yes
|
OPENBLAS=yes
|
||||||
# snadampal: skipping llvm src build install because the current version
|
# snadampal: skipping llvm src build install because the current version
|
||||||
# from pytorch/llvm:9.0.1 is x86 specific
|
# from pytorch/llvm:9.0.1 is x86 specific
|
||||||
SKIP_LLVM_SRC_BUILD_INSTALL=yes
|
SKIP_LLVM_SRC_BUILD_INSTALL=yes
|
||||||
INDUCTOR_BENCHMARKS=yes
|
INDUCTOR_BENCHMARKS=yes
|
||||||
;;
|
;;
|
||||||
pytorch-linux-noble-riscv64-py3.12-gcc14)
|
|
||||||
GCC_VERSION=14
|
|
||||||
;;
|
|
||||||
*)
|
*)
|
||||||
# Catch-all for builds that are not hardcoded.
|
# Catch-all for builds that are not hardcoded.
|
||||||
VISION=yes
|
VISION=yes
|
||||||
@ -412,14 +422,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -n "$GCC_VERSION" ]; then
|
if [ -n "$GCC_VERSION" ]; then
|
||||||
if [[ "$image" == *riscv* ]]; then
|
if !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
|
||||||
# Check RISC-V cross-compilation toolchain version
|
|
||||||
if !(drun riscv64-linux-gnu-gcc-${GCC_VERSION} --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
|
|
||||||
echo "RISC-V GCC_VERSION=$GCC_VERSION, but:"
|
|
||||||
drun riscv64-linux-gnu-gcc-${GCC_VERSION} --version
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
elif !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
|
|
||||||
echo "GCC_VERSION=$GCC_VERSION, but:"
|
echo "GCC_VERSION=$GCC_VERSION, but:"
|
||||||
drun gcc --version
|
drun gcc --version
|
||||||
exit 1
|
exit 1
|
||||||
|
|||||||
@ -1,2 +0,0 @@
|
|||||||
transformers==4.54.0
|
|
||||||
soxr==0.5.0
|
|
||||||
1
.ci/docker/ci_commit_pins/huggingface.txt
Normal file
1
.ci/docker/ci_commit_pins/huggingface.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
243e186efbf7fb93328dd6b34927a4e8c8f24395
|
||||||
@ -1 +0,0 @@
|
|||||||
v2.27.7-1
|
|
||||||
@ -1 +1 @@
|
|||||||
74a23feff57432129df84d8099e622773cf77925
|
e03a63be43e33596f7f0a43b0f530353785e4a59
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
1b0418a9a454b2b93ab8d71f40e59d2297157fae
|
ae324eeac8e102a2b40370e341460f3791353398
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
bbb06c0334a6772b92d24bde54956e675c8c6604
|
f7888497a1eb9e98d4c07537f0d0bcfe180d1363
|
||||||
|
|||||||
@ -66,9 +66,8 @@ function do_cpython_build {
|
|||||||
ln -s pip3 ${prefix}/bin/pip
|
ln -s pip3 ${prefix}/bin/pip
|
||||||
fi
|
fi
|
||||||
# install setuptools since python 3.12 is required to use distutils
|
# install setuptools since python 3.12 is required to use distutils
|
||||||
# packaging is needed to create symlink since wheel no longer provides needed information
|
${prefix}/bin/pip install wheel==0.45.1 setuptools==80.9.0
|
||||||
${prefix}/bin/pip install packaging==25.0 wheel==0.45.1 setuptools==80.9.0
|
local abi_tag=$(${prefix}/bin/python -c "from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag; print('{0}{1}-{2}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag()))")
|
||||||
local abi_tag=$(${prefix}/bin/python -c "from packaging.tags import interpreter_name, interpreter_version; import sysconfig ; from sysconfig import get_config_var; print('{0}{1}-{0}{1}{2}'.format(interpreter_name(), interpreter_version(), 't' if sysconfig.get_config_var('Py_GIL_DISABLED') else ''))")
|
|
||||||
ln -sf ${prefix} /opt/python/${abi_tag}
|
ln -sf ${prefix} /opt/python/${abi_tag}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -83,9 +82,9 @@ function build_cpython {
|
|||||||
py_suffix=${py_ver::-1}
|
py_suffix=${py_ver::-1}
|
||||||
py_folder=$py_suffix
|
py_folder=$py_suffix
|
||||||
fi
|
fi
|
||||||
# Update to rc2 due to https://github.com/python/cpython/commit/c72699086fe4
|
# Only b3 is available now
|
||||||
if [ "$py_suffix" == "3.14.0" ]; then
|
if [ "$py_suffix" == "3.14.0" ]; then
|
||||||
py_suffix="3.14.0rc2"
|
py_suffix="3.14.0b3"
|
||||||
fi
|
fi
|
||||||
wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz
|
wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz
|
||||||
do_cpython_build $py_ver Python-$py_suffix
|
do_cpython_build $py_ver Python-$py_suffix
|
||||||
|
|||||||
@ -10,7 +10,7 @@ else
|
|||||||
arch_path='sbsa'
|
arch_path='sbsa'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
NVSHMEM_VERSION=3.3.24
|
NVSHMEM_VERSION=3.3.9
|
||||||
|
|
||||||
function install_cuda {
|
function install_cuda {
|
||||||
version=$1
|
version=$1
|
||||||
@ -62,16 +62,14 @@ function install_nvshmem {
|
|||||||
mkdir -p "${tmpdir}" && cd "${tmpdir}"
|
mkdir -p "${tmpdir}" && cd "${tmpdir}"
|
||||||
|
|
||||||
# nvSHMEM license: https://docs.nvidia.com/nvshmem/api/sla.html
|
# nvSHMEM license: https://docs.nvidia.com/nvshmem/api/sla.html
|
||||||
# This pattern is a lie as it is not consistent across versions, for 3.3.9 it was cuda_ver-arch-nvshhem-ver
|
filename="libnvshmem_cuda${cuda_major_version}-linux-${arch_path}-${nvshmem_version}"
|
||||||
filename="libnvshmem-linux-${arch_path}-${nvshmem_version}_cuda${cuda_major_version}-archive"
|
url="https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version}/builds/cuda${cuda_major_version}/txz/agnostic/${dl_arch}/${filename}.tar.gz"
|
||||||
suffix=".tar.xz"
|
|
||||||
url="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/linux-${arch_path}/${filename}${suffix}"
|
|
||||||
|
|
||||||
# download, unpack, install
|
# download, unpack, install
|
||||||
wget -q "${url}"
|
wget -q "${url}"
|
||||||
tar xf "${filename}${suffix}"
|
tar xf "${filename}.tar.gz"
|
||||||
cp -a "${filename}/include/"* /usr/local/cuda/include/
|
cp -a "libnvshmem/include/"* /usr/local/cuda/include/
|
||||||
cp -a "${filename}/lib/"* /usr/local/cuda/lib64/
|
cp -a "libnvshmem/lib/"* /usr/local/cuda/lib64/
|
||||||
|
|
||||||
# cleanup
|
# cleanup
|
||||||
cd ..
|
cd ..
|
||||||
@ -128,6 +126,74 @@ function install_129 {
|
|||||||
ldconfig
|
ldconfig
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function prune_124 {
|
||||||
|
echo "Pruning CUDA 12.4"
|
||||||
|
#####################################################################################
|
||||||
|
# CUDA 12.4 prune static libs
|
||||||
|
#####################################################################################
|
||||||
|
export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune"
|
||||||
|
export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64"
|
||||||
|
|
||||||
|
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||||
|
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||||
|
|
||||||
|
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||||
|
export GENCODE=$OVERRIDE_GENCODE
|
||||||
|
fi
|
||||||
|
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
|
||||||
|
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
|
||||||
|
fi
|
||||||
|
|
||||||
|
# all CUDA libs except CuDNN and CuBLAS
|
||||||
|
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||||
|
| xargs -I {} bash -c \
|
||||||
|
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||||
|
|
||||||
|
# prune CuDNN and CuBLAS
|
||||||
|
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||||
|
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||||
|
|
||||||
|
#####################################################################################
|
||||||
|
# CUDA 12.4 prune visual tools
|
||||||
|
#####################################################################################
|
||||||
|
export CUDA_BASE="/usr/local/cuda-12.4/"
|
||||||
|
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/
|
||||||
|
}
|
||||||
|
|
||||||
|
function prune_126 {
|
||||||
|
echo "Pruning CUDA 12.6"
|
||||||
|
#####################################################################################
|
||||||
|
# CUDA 12.6 prune static libs
|
||||||
|
#####################################################################################
|
||||||
|
export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune"
|
||||||
|
export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64"
|
||||||
|
|
||||||
|
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||||
|
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
|
||||||
|
|
||||||
|
if [[ -n "$OVERRIDE_GENCODE" ]]; then
|
||||||
|
export GENCODE=$OVERRIDE_GENCODE
|
||||||
|
fi
|
||||||
|
if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then
|
||||||
|
export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN
|
||||||
|
fi
|
||||||
|
|
||||||
|
# all CUDA libs except CuDNN and CuBLAS
|
||||||
|
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
|
||||||
|
| xargs -I {} bash -c \
|
||||||
|
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
|
||||||
|
|
||||||
|
# prune CuDNN and CuBLAS
|
||||||
|
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
|
||||||
|
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
|
||||||
|
|
||||||
|
#####################################################################################
|
||||||
|
# CUDA 12.6 prune visual tools
|
||||||
|
#####################################################################################
|
||||||
|
export CUDA_BASE="/usr/local/cuda-12.6/"
|
||||||
|
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/
|
||||||
|
}
|
||||||
|
|
||||||
function install_128 {
|
function install_128 {
|
||||||
CUDNN_VERSION=9.8.0.87
|
CUDNN_VERSION=9.8.0.87
|
||||||
echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
|
echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
|
||||||
@ -146,38 +212,18 @@ function install_128 {
|
|||||||
ldconfig
|
ldconfig
|
||||||
}
|
}
|
||||||
|
|
||||||
function install_130 {
|
|
||||||
CUDNN_VERSION=9.13.0.50
|
|
||||||
echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
|
|
||||||
# install CUDA 13.0 in the same container
|
|
||||||
install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
|
|
||||||
|
|
||||||
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
|
|
||||||
install_cudnn 13 $CUDNN_VERSION
|
|
||||||
|
|
||||||
install_nvshmem 13 $NVSHMEM_VERSION
|
|
||||||
|
|
||||||
CUDA_VERSION=13.0 bash install_nccl.sh
|
|
||||||
|
|
||||||
CUDA_VERSION=13.0 bash install_cusparselt.sh
|
|
||||||
|
|
||||||
ldconfig
|
|
||||||
}
|
|
||||||
|
|
||||||
# idiomatic parameter and option handling in sh
|
# idiomatic parameter and option handling in sh
|
||||||
while test $# -gt 0
|
while test $# -gt 0
|
||||||
do
|
do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
12.4) install_124;
|
12.4) install_124; prune_124
|
||||||
;;
|
;;
|
||||||
12.6|12.6.*) install_126;
|
12.6|12.6.*) install_126; prune_126
|
||||||
;;
|
;;
|
||||||
12.8|12.8.*) install_128;
|
12.8|12.8.*) install_128;
|
||||||
;;
|
;;
|
||||||
12.9|12.9.*) install_129;
|
12.9|12.9.*) install_129;
|
||||||
;;
|
;;
|
||||||
13.0|13.0.*) install_130;
|
|
||||||
;;
|
|
||||||
*) echo "bad argument $1"; exit 1
|
*) echo "bad argument $1"; exit 1
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|||||||
@ -5,15 +5,7 @@ set -ex
|
|||||||
# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
|
||||||
mkdir tmp_cusparselt && cd tmp_cusparselt
|
mkdir tmp_cusparselt && cd tmp_cusparselt
|
||||||
|
|
||||||
if [[ ${CUDA_VERSION:0:4} =~ "13" ]]; then
|
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-9]$ ]]; then
|
||||||
arch_path='sbsa'
|
|
||||||
export TARGETARCH=${TARGETARCH:-$(uname -m)}
|
|
||||||
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
|
|
||||||
arch_path='x86_64'
|
|
||||||
fi
|
|
||||||
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.8.0.4_cuda13-archive"
|
|
||||||
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
|
|
||||||
elif [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-9]$ ]]; then
|
|
||||||
arch_path='sbsa'
|
arch_path='sbsa'
|
||||||
export TARGETARCH=${TARGETARCH:-$(uname -m)}
|
export TARGETARCH=${TARGETARCH:-$(uname -m)}
|
||||||
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
|
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
|
||||||
|
|||||||
@ -5,7 +5,9 @@ set -ex
|
|||||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||||
|
|
||||||
function install_huggingface() {
|
function install_huggingface() {
|
||||||
pip_install -r huggingface-requirements.txt
|
local version
|
||||||
|
commit=$(get_pinned_commit huggingface)
|
||||||
|
pip_install "git+https://github.com/huggingface/transformers@${commit}"
|
||||||
}
|
}
|
||||||
|
|
||||||
function install_timm() {
|
function install_timm() {
|
||||||
@ -24,12 +26,15 @@ function install_torchbench() {
|
|||||||
|
|
||||||
python install.py --continue_on_fail
|
python install.py --continue_on_fail
|
||||||
|
|
||||||
|
# TODO (huydhn): transformers-4.44.2 added by https://github.com/pytorch/benchmark/pull/2488
|
||||||
|
# is regressing speedup metric. This needs to be investigated further
|
||||||
|
pip install transformers==4.38.1
|
||||||
|
|
||||||
echo "Print all dependencies after TorchBench is installed"
|
echo "Print all dependencies after TorchBench is installed"
|
||||||
python -mpip freeze
|
python -mpip freeze
|
||||||
popd
|
popd
|
||||||
|
|
||||||
chown -R jenkins torchbench
|
chown -R jenkins torchbench
|
||||||
chown -R jenkins /opt/conda
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Pango is needed for weasyprint which is needed for doctr
|
# Pango is needed for weasyprint which is needed for doctr
|
||||||
@ -43,4 +48,4 @@ install_huggingface
|
|||||||
install_timm
|
install_timm
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
conda_run pip uninstall -y torch torchvision torchaudio triton torchao
|
conda_run pip uninstall -y torch torchvision torchaudio triton
|
||||||
|
|||||||
@ -7,8 +7,6 @@ if [[ ${CUDA_VERSION:0:2} == "11" ]]; then
|
|||||||
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu11.txt)
|
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu11.txt)
|
||||||
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
|
elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
|
||||||
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu12.txt)
|
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu12.txt)
|
||||||
elif [[ ${CUDA_VERSION:0:2} == "13" ]]; then
|
|
||||||
NCCL_VERSION=$(cat ci_commit_pins/nccl-cu13.txt)
|
|
||||||
else
|
else
|
||||||
echo "Unexpected CUDA_VERSION ${CUDA_VERSION}"
|
echo "Unexpected CUDA_VERSION ${CUDA_VERSION}"
|
||||||
exit 1
|
exit 1
|
||||||
|
|||||||
@ -19,8 +19,8 @@ pip_install \
|
|||||||
transformers==4.36.2
|
transformers==4.36.2
|
||||||
|
|
||||||
pip_install coloredlogs packaging
|
pip_install coloredlogs packaging
|
||||||
pip_install onnxruntime==1.22.1
|
pip_install onnxruntime==1.18.1
|
||||||
pip_install onnxscript==0.4.0
|
pip_install onnxscript==0.3.1
|
||||||
|
|
||||||
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
|
# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
|
||||||
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
|
# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
|
||||||
|
|||||||
@ -57,7 +57,7 @@ if [ ! -f setup.py ]; then
|
|||||||
cd python
|
cd python
|
||||||
fi
|
fi
|
||||||
|
|
||||||
pip_install pybind11==3.0.1
|
pip_install pybind11==2.13.6
|
||||||
|
|
||||||
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
|
# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
|
||||||
as_jenkins sed -i -e 's/https:\/\/tritonlang.blob.core.windows.net\/llvm-builds/https:\/\/oaitriton.blob.core.windows.net\/public\/llvm-builds/g' setup.py
|
as_jenkins sed -i -e 's/https:\/\/tritonlang.blob.core.windows.net\/llvm-builds/https:\/\/oaitriton.blob.core.windows.net\/public\/llvm-builds/g' setup.py
|
||||||
|
|||||||
@ -44,12 +44,8 @@ function install_ucc() {
|
|||||||
|
|
||||||
./autogen.sh
|
./autogen.sh
|
||||||
|
|
||||||
if [[ -n "$CUDA_VERSION" && $CUDA_VERSION == 13* ]]; then
|
|
||||||
NVCC_GENCODE="-gencode=arch=compute_86,code=compute_86"
|
|
||||||
else
|
|
||||||
# We only run distributed tests on Tesla M60 and A10G
|
# We only run distributed tests on Tesla M60 and A10G
|
||||||
NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
|
NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ -n "$ROCM_VERSION" ]]; then
|
if [[ -n "$ROCM_VERSION" ]]; then
|
||||||
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
|
if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
|
||||||
|
|||||||
@ -34,8 +34,6 @@ function install_ubuntu() {
|
|||||||
|
|
||||||
# The xpu-smi packages
|
# The xpu-smi packages
|
||||||
apt-get install -y flex bison xpu-smi
|
apt-get install -y flex bison xpu-smi
|
||||||
|
|
||||||
if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
|
|
||||||
# Compute and Media Runtimes
|
# Compute and Media Runtimes
|
||||||
apt-get install -y \
|
apt-get install -y \
|
||||||
intel-opencl-icd intel-level-zero-gpu level-zero \
|
intel-opencl-icd intel-level-zero-gpu level-zero \
|
||||||
@ -43,18 +41,11 @@ function install_ubuntu() {
|
|||||||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
||||||
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
||||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
|
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
|
||||||
|
if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then
|
||||||
|
apt-get install -y intel-ocloc
|
||||||
|
fi
|
||||||
# Development Packages
|
# Development Packages
|
||||||
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
|
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
|
||||||
else # rolling driver
|
|
||||||
apt-get install -y \
|
|
||||||
intel-opencl-icd libze-intel-gpu1 libze1 \
|
|
||||||
intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
|
|
||||||
libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
|
|
||||||
libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
|
|
||||||
mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc
|
|
||||||
apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev
|
|
||||||
fi
|
|
||||||
|
|
||||||
# Install Intel Support Packages
|
# Install Intel Support Packages
|
||||||
apt-get install -y ${XPU_PACKAGES}
|
apt-get install -y ${XPU_PACKAGES}
|
||||||
|
|
||||||
@ -65,15 +56,11 @@ function install_ubuntu() {
|
|||||||
|
|
||||||
function install_rhel() {
|
function install_rhel() {
|
||||||
. /etc/os-release
|
. /etc/os-release
|
||||||
if [[ "${ID}" == "rhel" ]]; then
|
|
||||||
if [[ ! " 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
|
if [[ ! " 8.8 8.10 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
|
||||||
echo "RHEL version ${VERSION_ID} not supported"
|
echo "RHEL version ${VERSION_ID} not supported"
|
||||||
exit
|
exit
|
||||||
fi
|
fi
|
||||||
elif [[ "${ID}" == "almalinux" ]]; then
|
|
||||||
# Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64
|
|
||||||
VERSION_ID="8.8"
|
|
||||||
fi
|
|
||||||
|
|
||||||
dnf install -y 'dnf-command(config-manager)'
|
dnf install -y 'dnf-command(config-manager)'
|
||||||
# To add the online network package repository for the GPU Driver
|
# To add the online network package repository for the GPU Driver
|
||||||
@ -143,18 +130,18 @@ function install_sles() {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# Default use GPU driver rolling releases
|
# Default use GPU driver LTS releases
|
||||||
XPU_DRIVER_VERSION=""
|
XPU_DRIVER_VERSION="/lts/2350"
|
||||||
if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
|
if [[ "${XPU_DRIVER_TYPE,,}" == "rolling" ]]; then
|
||||||
# Use GPU driver LTS releases
|
# Use GPU driver rolling releases
|
||||||
XPU_DRIVER_VERSION="/lts/2350"
|
XPU_DRIVER_VERSION=""
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Default use Intel® oneAPI Deep Learning Essentials 2025.1
|
# Default use Intel® oneAPI Deep Learning Essentials 2025.0
|
||||||
if [[ "$XPU_VERSION" == "2025.2" ]]; then
|
if [[ "$XPU_VERSION" == "2025.1" ]]; then
|
||||||
XPU_PACKAGES="intel-deep-learning-essentials-2025.2"
|
|
||||||
else
|
|
||||||
XPU_PACKAGES="intel-deep-learning-essentials-2025.1"
|
XPU_PACKAGES="intel-deep-learning-essentials-2025.1"
|
||||||
|
else
|
||||||
|
XPU_PACKAGES="intel-deep-learning-essentials-2025.0"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# The installation depends on the base OS
|
# The installation depends on the base OS
|
||||||
|
|||||||
@ -1,9 +0,0 @@
|
|||||||
#!/bin/bash
|
|
||||||
set -xe
|
|
||||||
# Script used in Linux x86 and aarch64 CD pipeline
|
|
||||||
|
|
||||||
# Workaround for exposing statically linked libstdc++ CXX11 ABI symbols.
|
|
||||||
# see: https://github.com/pytorch/pytorch/issues/133437
|
|
||||||
LIBNONSHARED=$(gcc -print-file-name=libstdc++_nonshared.a)
|
|
||||||
nm -g $LIBNONSHARED | grep " T " | grep recursive_directory_iterator | cut -c 20- > weaken-symbols.txt
|
|
||||||
objcopy --weaken-symbols weaken-symbols.txt $LIBNONSHARED $LIBNONSHARED
|
|
||||||
@ -69,19 +69,6 @@ RUN bash ./install_cuda.sh 12.9
|
|||||||
RUN bash ./install_magma.sh 12.9
|
RUN bash ./install_magma.sh 12.9
|
||||||
RUN ln -sf /usr/local/cuda-12.9 /usr/local/cuda
|
RUN ln -sf /usr/local/cuda-12.9 /usr/local/cuda
|
||||||
|
|
||||||
FROM cuda as cuda13.0
|
|
||||||
RUN bash ./install_cuda.sh 13.0
|
|
||||||
RUN bash ./install_magma.sh 13.0
|
|
||||||
RUN ln -sf /usr/local/cuda-13.0 /usr/local/cuda
|
|
||||||
|
|
||||||
# Install libibverbs for libtorch and copy to CUDA directory
|
|
||||||
RUN apt-get update -y && \
|
|
||||||
apt-get install -y libibverbs-dev librdmacm-dev && \
|
|
||||||
cp /usr/lib/x86_64-linux-gnu/libmlx5.so* /usr/local/cuda/lib64/ && \
|
|
||||||
cp /usr/lib/x86_64-linux-gnu/librdmacm.so* /usr/local/cuda/lib64/ && \
|
|
||||||
cp /usr/lib/x86_64-linux-gnu/libibverbs.so* /usr/local/cuda/lib64/ && \
|
|
||||||
cp /usr/lib/x86_64-linux-gnu/libnl* /usr/local/cuda/lib64/
|
|
||||||
|
|
||||||
FROM cpu as rocm
|
FROM cpu as rocm
|
||||||
ARG ROCM_VERSION
|
ARG ROCM_VERSION
|
||||||
ARG PYTORCH_ROCM_ARCH
|
ARG PYTORCH_ROCM_ARCH
|
||||||
|
|||||||
@ -130,8 +130,7 @@ ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/op
|
|||||||
RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
|
RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
|
||||||
/opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \
|
/opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \
|
||||||
done;
|
done;
|
||||||
ADD ./common/patch_libstdc.sh patch_libstdc.sh
|
|
||||||
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
|
|
||||||
|
|
||||||
# cmake-3.18.4 from pip; force in case cmake3 already exists
|
# cmake-3.18.4 from pip; force in case cmake3 already exists
|
||||||
RUN yum install -y python3-pip && \
|
RUN yum install -y python3-pip && \
|
||||||
@ -176,6 +175,6 @@ ENV XPU_DRIVER_TYPE ROLLING
|
|||||||
RUN python3 -m pip install --upgrade pip && \
|
RUN python3 -m pip install --upgrade pip && \
|
||||||
python3 -mpip install cmake==3.28.4
|
python3 -mpip install cmake==3.28.4
|
||||||
ADD ./common/install_xpu.sh install_xpu.sh
|
ADD ./common/install_xpu.sh install_xpu.sh
|
||||||
ENV XPU_VERSION 2025.2
|
ENV XPU_VERSION 2025.1
|
||||||
RUN bash ./install_xpu.sh && rm install_xpu.sh
|
RUN bash ./install_xpu.sh && rm install_xpu.sh
|
||||||
RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd
|
RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd
|
||||||
|
|||||||
@ -71,5 +71,3 @@ RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
|
|||||||
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
|
RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
|
||||||
COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/
|
COPY --from=openblas /opt/OpenBLAS/ /opt/OpenBLAS/
|
||||||
ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH
|
ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH
|
||||||
ADD ./common/patch_libstdc.sh patch_libstdc.sh
|
|
||||||
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
|
|
||||||
|
|||||||
@ -95,5 +95,3 @@ COPY --from=nvpl /opt/nvpl/lib/ /usr/local/lib/
|
|||||||
COPY --from=nvpl /opt/nvpl/include/ /usr/local/include/
|
COPY --from=nvpl /opt/nvpl/include/ /usr/local/include/
|
||||||
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
|
RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
|
||||||
ENV PATH=/usr/local/cuda/bin:$PATH
|
ENV PATH=/usr/local/cuda/bin:$PATH
|
||||||
ADD ./common/patch_libstdc.sh patch_libstdc.sh
|
|
||||||
RUN bash ./patch_libstdc.sh && rm patch_libstdc.sh
|
|
||||||
|
|||||||
@ -67,12 +67,6 @@ case ${image} in
|
|||||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13"
|
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13"
|
||||||
MANY_LINUX_VERSION="2_28"
|
MANY_LINUX_VERSION="2_28"
|
||||||
;;
|
;;
|
||||||
manylinux2_28-builder:cuda13*)
|
|
||||||
TARGET=cuda_final
|
|
||||||
GPU_IMAGE=amd64/almalinux:8
|
|
||||||
DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13"
|
|
||||||
MANY_LINUX_VERSION="2_28"
|
|
||||||
;;
|
|
||||||
manylinuxaarch64-builder:cuda*)
|
manylinuxaarch64-builder:cuda*)
|
||||||
TARGET=cuda_final
|
TARGET=cuda_final
|
||||||
GPU_IMAGE=amd64/almalinux:8
|
GPU_IMAGE=amd64/almalinux:8
|
||||||
|
|||||||
@ -63,12 +63,11 @@ lark==0.12.0
|
|||||||
#Pinned versions: 0.12.0
|
#Pinned versions: 0.12.0
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
librosa>=0.6.2 ; python_version < "3.11" and platform_machine != "s390x"
|
librosa>=0.6.2 ; python_version < "3.11"
|
||||||
librosa==0.10.2 ; python_version == "3.12" and platform_machine != "s390x"
|
librosa==0.10.2 ; python_version == "3.12"
|
||||||
#Description: A python package for music and audio analysis
|
#Description: A python package for music and audio analysis
|
||||||
#Pinned versions: >=0.6.2
|
#Pinned versions: >=0.6.2
|
||||||
#test that import: test_spectral_ops.py
|
#test that import: test_spectral_ops.py
|
||||||
#librosa depends on numba; disable it for s390x while numba is disabled too
|
|
||||||
|
|
||||||
#mkl #this breaks linux-bionic-rocm4.5-py3.7
|
#mkl #this breaks linux-bionic-rocm4.5-py3.7
|
||||||
#Description: Intel oneAPI Math Kernel Library
|
#Description: Intel oneAPI Math Kernel Library
|
||||||
@ -93,9 +92,8 @@ librosa==0.10.2 ; python_version == "3.12" and platform_machine != "s390x"
|
|||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
mypy==1.16.0 ; platform_system != "Windows"
|
mypy==1.16.0
|
||||||
# Pin MyPy version because new errors are likely to appear with each release
|
# Pin MyPy version because new errors are likely to appear with each release
|
||||||
# Skip on Windows as lots of type annotations are POSIX specific
|
|
||||||
#Description: linter
|
#Description: linter
|
||||||
#Pinned versions: 1.16.0
|
#Pinned versions: 1.16.0
|
||||||
#test that import: test_typing.py, test_type_hints.py
|
#test that import: test_typing.py, test_type_hints.py
|
||||||
@ -112,15 +110,14 @@ ninja==1.11.1.3
|
|||||||
#Pinned versions: 1.11.1.3
|
#Pinned versions: 1.11.1.3
|
||||||
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
|
#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
|
||||||
|
|
||||||
numba==0.49.0 ; python_version < "3.9" and platform_machine != "s390x"
|
numba==0.49.0 ; python_version < "3.9"
|
||||||
numba==0.55.2 ; python_version == "3.9" and platform_machine != "s390x"
|
numba==0.55.2 ; python_version == "3.9"
|
||||||
numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
|
numba==0.55.2 ; python_version == "3.10"
|
||||||
numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
|
numba==0.60.0 ; python_version == "3.12"
|
||||||
#Description: Just-In-Time Compiler for Numerical Functions
|
#Description: Just-In-Time Compiler for Numerical Functions
|
||||||
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
|
#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
|
||||||
#test that import: test_numba_integration.py
|
#test that import: test_numba_integration.py
|
||||||
#For numba issue see https://github.com/pytorch/pytorch/issues/51511
|
#For numba issue see https://github.com/pytorch/pytorch/issues/51511
|
||||||
#Need release > 0.61.2 for s390x due to https://github.com/numba/numba/pull/10073
|
|
||||||
|
|
||||||
#numpy
|
#numpy
|
||||||
#Description: Provides N-dimensional arrays and linear algebra
|
#Description: Provides N-dimensional arrays and linear algebra
|
||||||
@ -264,6 +261,11 @@ scipy==1.14.1 ; python_version >= "3.12"
|
|||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
|
tb-nightly==2.13.0a20230426
|
||||||
|
#Description: TensorBoard
|
||||||
|
#Pinned versions:
|
||||||
|
#test that import:
|
||||||
|
|
||||||
# needed by torchgen utils
|
# needed by torchgen utils
|
||||||
typing-extensions>=4.10.0
|
typing-extensions>=4.10.0
|
||||||
#Description: type hints for python
|
#Description: type hints for python
|
||||||
@ -305,7 +307,7 @@ pytest-cpp==2.3.0
|
|||||||
#Pinned versions: 2.3.0
|
#Pinned versions: 2.3.0
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
z3-solver==4.15.1.0 ; platform_machine != "s390x"
|
z3-solver==4.15.1.0
|
||||||
#Description: The Z3 Theorem Prover Project
|
#Description: The Z3 Theorem Prover Project
|
||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
@ -340,7 +342,7 @@ onnx==1.18.0
|
|||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
|
|
||||||
onnxscript==0.4.0
|
onnxscript==0.3.1
|
||||||
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
|
#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
|
||||||
#Pinned versions:
|
#Pinned versions:
|
||||||
#test that import:
|
#test that import:
|
||||||
@ -380,7 +382,7 @@ dataclasses_json==0.6.7
|
|||||||
cmake==4.0.0
|
cmake==4.0.0
|
||||||
#Description: required for building
|
#Description: required for building
|
||||||
|
|
||||||
tlparse==0.4.0
|
tlparse==0.3.30
|
||||||
#Description: required for log parsing
|
#Description: required for log parsing
|
||||||
|
|
||||||
cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x"
|
cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x"
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
sphinx==5.3.0
|
sphinx==5.3.0
|
||||||
#Description: This is used to generate PyTorch docs
|
#Description: This is used to generate PyTorch docs
|
||||||
#Pinned versions: 5.3.0
|
#Pinned versions: 5.3.0
|
||||||
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@71e55749be14ceb56e7f8211a9fb649866b87ad4#egg=pytorch_sphinx_theme2
|
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@722b7e6f9ca512fcc526ad07d62b3d28c50bb6cd#egg=pytorch_sphinx_theme2
|
||||||
|
|
||||||
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
|
||||||
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
3.5.0
|
3.4.0
|
||||||
|
|||||||
@ -1 +1 @@
|
|||||||
3.5.0
|
3.4.0
|
||||||
|
|||||||
@ -1,155 +0,0 @@
|
|||||||
# Cross-compilation Docker container for RISC-V architecture
|
|
||||||
ARG UBUNTU_VERSION
|
|
||||||
FROM --platform=linux/amd64 ubuntu:${UBUNTU_VERSION} as base
|
|
||||||
|
|
||||||
ARG UBUNTU_VERSION
|
|
||||||
|
|
||||||
ENV GCC_VERSION=14
|
|
||||||
ENV PYTHON_VERSION=3.12.3
|
|
||||||
ENV DEBIAN_FRONTEND=noninteractive
|
|
||||||
ENV CC=riscv64-linux-gnu-gcc-${GCC_VERSION}
|
|
||||||
ENV CXX=riscv64-linux-gnu-g++-${GCC_VERSION}
|
|
||||||
ENV QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/
|
|
||||||
ENV SYSROOT=/opt/sysroot
|
|
||||||
|
|
||||||
# Install basic dependencies
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
ninja-build \
|
|
||||||
autoconf \
|
|
||||||
automake \
|
|
||||||
libtool \
|
|
||||||
patchelf \
|
|
||||||
ccache \
|
|
||||||
git \
|
|
||||||
wget \
|
|
||||||
python3-pip \
|
|
||||||
python3-venv \
|
|
||||||
python-is-python3 \
|
|
||||||
cmake \
|
|
||||||
sudo \
|
|
||||||
lsb-release \
|
|
||||||
gcc-${GCC_VERSION}-riscv64-linux-gnu \
|
|
||||||
g++-${GCC_VERSION}-riscv64-linux-gnu \
|
|
||||||
pkg-config \
|
|
||||||
&& rm -rf /var/lib/apt/lists/*
|
|
||||||
|
|
||||||
# Install user
|
|
||||||
COPY ./common/install_user.sh install_user.sh
|
|
||||||
RUN bash ./install_user.sh && rm install_user.sh
|
|
||||||
|
|
||||||
FROM base as python
|
|
||||||
ARG ZLIB_VERSION=1.3.1
|
|
||||||
ARG FFI_VERSION=3.4.6
|
|
||||||
ARG BZ2_VERSION=1.0.8
|
|
||||||
ARG XZ_VERSION=5.4.6
|
|
||||||
ARG OPENSSL_VERSION=3.2.1
|
|
||||||
|
|
||||||
# Set up sysroot directory for dependencies
|
|
||||||
ENV PKG_CONFIG_PATH=${SYSROOT}/lib/pkgconfig
|
|
||||||
ENV PKG_CONFIG_SYSROOT_DIR=${SYSROOT}
|
|
||||||
|
|
||||||
WORKDIR /opt
|
|
||||||
|
|
||||||
# Build zlib (for compression)
|
|
||||||
RUN echo "--- Building zlib ---" \
|
|
||||||
&& wget -c https://www.zlib.net/zlib-${ZLIB_VERSION}.tar.gz \
|
|
||||||
&& tar -xf zlib-${ZLIB_VERSION}.tar.gz --no-same-permissions --no-same-owner \
|
|
||||||
&& cd zlib-${ZLIB_VERSION}/ \
|
|
||||||
&& mkdir build && cd build \
|
|
||||||
&& ../configure --prefix=${SYSROOT} \
|
|
||||||
&& make -j$(nproc) && make install \
|
|
||||||
&& cd ../..
|
|
||||||
|
|
||||||
# Build libffi (for ctypes module)
|
|
||||||
RUN echo "--- Building libffi ---" \
|
|
||||||
&& wget -c https://github.com/libffi/libffi/releases/download/v${FFI_VERSION}/libffi-${FFI_VERSION}.tar.gz \
|
|
||||||
&& tar -xf libffi-${FFI_VERSION}.tar.gz --no-same-permissions --no-same-owner \
|
|
||||||
&& cd libffi-${FFI_VERSION}/ \
|
|
||||||
&& mkdir build && cd build \
|
|
||||||
&& ../configure --prefix=${SYSROOT} --host=riscv64-linux-gnu --build=x86_64-linux-gnu \
|
|
||||||
&& make -j$(nproc) && make install \
|
|
||||||
&& cd ../..
|
|
||||||
|
|
||||||
# Build bzip2 (for bz2 module)
|
|
||||||
RUN echo "--- Building bzip2 ---" \
|
|
||||||
&& wget -c https://sourceware.org/pub/bzip2/bzip2-${BZ2_VERSION}.tar.gz \
|
|
||||||
&& tar -xf bzip2-${BZ2_VERSION}.tar.gz --no-same-permissions --no-same-owner \
|
|
||||||
&& cd bzip2-${BZ2_VERSION}/ \
|
|
||||||
&& make CC=riscv64-linux-gnu-gcc-${GCC_VERSION} bzip2 bzip2recover libbz2.a \
|
|
||||||
&& make CC=riscv64-linux-gnu-gcc-${GCC_VERSION} -f Makefile-libbz2_so \
|
|
||||||
&& make install PREFIX=${SYSROOT} \
|
|
||||||
&& cp libbz2.so.${BZ2_VERSION} ${SYSROOT}/lib/ \
|
|
||||||
&& cd ${SYSROOT}/lib/ \
|
|
||||||
&& ln -sf libbz2.so.${BZ2_VERSION} libbz2.so.1.0 \
|
|
||||||
&& ln -sf libbz2.so.1.0 libbz2.so \
|
|
||||||
&& cd /opt/
|
|
||||||
|
|
||||||
# Build xz (for lzma module)
|
|
||||||
RUN echo "--- Building xz ---" \
|
|
||||||
&& wget -c https://github.com/tukaani-project/xz/releases/download/v${XZ_VERSION}/xz-${XZ_VERSION}.tar.gz \
|
|
||||||
&& tar -xf xz-${XZ_VERSION}.tar.gz --no-same-permissions --no-same-owner \
|
|
||||||
&& cd xz-${XZ_VERSION} \
|
|
||||||
&& mkdir build && cd build \
|
|
||||||
&& ../configure --prefix=${SYSROOT} --host=riscv64-linux-gnu --build=x86_64-linux-gnu \
|
|
||||||
&& make -j$(nproc) && make install \
|
|
||||||
&& cd ../..
|
|
||||||
|
|
||||||
# Build OpenSSL (for ssl module)
|
|
||||||
RUN echo "--- Building OpenSSL ---" \
|
|
||||||
&& wget -c https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz \
|
|
||||||
&& tar -xf openssl-${OPENSSL_VERSION}.tar.gz --no-same-permissions --no-same-owner \
|
|
||||||
&& cd openssl-${OPENSSL_VERSION}/ \
|
|
||||||
&& mkdir build && cd build \
|
|
||||||
&& ../Configure linux64-riscv64 --prefix=${SYSROOT} \
|
|
||||||
&& make -j$(nproc) && make install_sw \
|
|
||||||
&& cd ../..
|
|
||||||
|
|
||||||
# Build SQLite3 (for sqlite3 module)
|
|
||||||
RUN echo "--- Building SQLite3 ---" \
|
|
||||||
&& wget -c https://www.sqlite.org/2024/sqlite-autoconf-3450200.tar.gz \
|
|
||||||
&& tar -xf sqlite-autoconf-3450200.tar.gz --no-same-permissions --no-same-owner \
|
|
||||||
&& cd sqlite-autoconf-3450200 \
|
|
||||||
&& mkdir build && cd build \
|
|
||||||
&& ../configure --prefix=${SYSROOT} --host=riscv64-linux-gnu --build=x86_64-linux-gnu \
|
|
||||||
&& make -j$(nproc) && make install \
|
|
||||||
&& cd ../..
|
|
||||||
|
|
||||||
# Build and install RISC-V Python with all modules
|
|
||||||
RUN wget -c https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \
|
|
||||||
&& tar -xf Python-${PYTHON_VERSION}.tgz --no-same-permissions --no-same-owner \
|
|
||||||
&& cd Python-${PYTHON_VERSION} \
|
|
||||||
&& mkdir build && cd build \
|
|
||||||
&& ../configure \
|
|
||||||
--host=riscv64-linux-gnu \
|
|
||||||
--build=x86_64-linux-gnu \
|
|
||||||
--prefix=${SYSROOT} \
|
|
||||||
--enable-shared \
|
|
||||||
--disable-ipv6 \
|
|
||||||
--with-build-python=/usr/bin/python3 \
|
|
||||||
--with-ensurepip=no \
|
|
||||||
ac_cv_file__dev_ptmx=yes \
|
|
||||||
ac_cv_file__dev_ptc=no \
|
|
||||||
&& make -j$(nproc) \
|
|
||||||
&& make install
|
|
||||||
|
|
||||||
FROM base as final
|
|
||||||
COPY --from=python /opt/sysroot /opt/sysroot
|
|
||||||
|
|
||||||
# Install crossenv and cmake
|
|
||||||
RUN pip install crossenv cmake==4.0.0 --break-system-packages \
|
|
||||||
&& /usr/bin/python3 -m crossenv ${SYSROOT}/bin/python3 /opt/riscv-cross-env
|
|
||||||
|
|
||||||
# Add pip-installed cmake binaries to PATH
|
|
||||||
ENV PATH="/usr/local/bin:${PATH}"
|
|
||||||
|
|
||||||
# Set up cross Python environment
|
|
||||||
SHELL ["/bin/bash", "-c"]
|
|
||||||
RUN source /opt/riscv-cross-env/bin/activate \
|
|
||||||
&& pip install setuptools pyyaml typing_extensions wheel
|
|
||||||
|
|
||||||
# Set default environment variables for PyTorch build
|
|
||||||
ENV Python_ROOT_DIR=${SYSROOT}
|
|
||||||
ENV OPENSSL_ROOT_DIR=${SYSROOT}
|
|
||||||
|
|
||||||
USER jenkins
|
|
||||||
CMD ["bash"]
|
|
||||||
@ -96,11 +96,11 @@ ARG ANACONDA_PYTHON_VERSION
|
|||||||
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
|
||||||
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
||||||
COPY ./common/common_utils.sh common_utils.sh
|
COPY ./common/common_utils.sh common_utils.sh
|
||||||
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
|
COPY ci_commit_pins/huggingface.txt huggingface.txt
|
||||||
COPY ci_commit_pins/timm.txt timm.txt
|
COPY ci_commit_pins/timm.txt timm.txt
|
||||||
COPY ci_commit_pins/torchbench.txt torchbench.txt
|
COPY ci_commit_pins/torchbench.txt torchbench.txt
|
||||||
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
||||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
|
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt
|
||||||
|
|
||||||
# (optional) Install non-default Ninja version
|
# (optional) Install non-default Ninja version
|
||||||
ARG NINJA_VERSION
|
ARG NINJA_VERSION
|
||||||
|
|||||||
@ -56,10 +56,10 @@ RUN rm install_openssl.sh
|
|||||||
ARG INDUCTOR_BENCHMARKS
|
ARG INDUCTOR_BENCHMARKS
|
||||||
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
||||||
COPY ./common/common_utils.sh common_utils.sh
|
COPY ./common/common_utils.sh common_utils.sh
|
||||||
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
|
COPY ci_commit_pins/huggingface.txt huggingface.txt
|
||||||
COPY ci_commit_pins/timm.txt timm.txt
|
COPY ci_commit_pins/timm.txt timm.txt
|
||||||
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
||||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt
|
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt
|
||||||
|
|
||||||
# Install XPU Dependencies
|
# Install XPU Dependencies
|
||||||
ARG XPU_VERSION
|
ARG XPU_VERSION
|
||||||
|
|||||||
@ -66,7 +66,6 @@ ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/"
|
|||||||
# (optional) Install UCC
|
# (optional) Install UCC
|
||||||
ARG UCX_COMMIT
|
ARG UCX_COMMIT
|
||||||
ARG UCC_COMMIT
|
ARG UCC_COMMIT
|
||||||
ARG CUDA_VERSION
|
|
||||||
ENV UCX_COMMIT $UCX_COMMIT
|
ENV UCX_COMMIT $UCX_COMMIT
|
||||||
ENV UCC_COMMIT $UCC_COMMIT
|
ENV UCC_COMMIT $UCC_COMMIT
|
||||||
ENV UCX_HOME /usr
|
ENV UCX_HOME /usr
|
||||||
@ -97,11 +96,11 @@ RUN rm install_openssl.sh
|
|||||||
ARG INDUCTOR_BENCHMARKS
|
ARG INDUCTOR_BENCHMARKS
|
||||||
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
|
||||||
COPY ./common/common_utils.sh common_utils.sh
|
COPY ./common/common_utils.sh common_utils.sh
|
||||||
COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
|
COPY ci_commit_pins/huggingface.txt huggingface.txt
|
||||||
COPY ci_commit_pins/timm.txt timm.txt
|
COPY ci_commit_pins/timm.txt timm.txt
|
||||||
COPY ci_commit_pins/torchbench.txt torchbench.txt
|
COPY ci_commit_pins/torchbench.txt torchbench.txt
|
||||||
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
|
||||||
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
|
RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt
|
||||||
|
|
||||||
ARG TRITON
|
ARG TRITON
|
||||||
ARG TRITON_CPU
|
ARG TRITON_CPU
|
||||||
@ -182,6 +181,7 @@ COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
|
|||||||
RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; fi
|
RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; fi
|
||||||
|
|
||||||
# AWS specific CUDA build guidance
|
# AWS specific CUDA build guidance
|
||||||
|
ENV TORCH_CUDA_ARCH_LIST Maxwell
|
||||||
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
|
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
|
||||||
ENV CUDA_PATH /usr/local/cuda
|
ENV CUDA_PATH /usr/local/cuda
|
||||||
|
|
||||||
|
|||||||
@ -7,4 +7,4 @@ set -ex
|
|||||||
|
|
||||||
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.10" ${SCRIPTPATH}/../manywheel/build.sh
|
USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh
|
||||||
|
|||||||
@ -1,31 +0,0 @@
|
|||||||
# 🔧 Lumen_cli
|
|
||||||
A Python CLI tool for building and testing PyTorch-based components, using a YAML configuration file for structured, repeatable workflows.
|
|
||||||
|
|
||||||
|
|
||||||
## Features
|
|
||||||
- **Build**
|
|
||||||
- external projects (e.g. vLLM)
|
|
||||||
|
|
||||||
## 📦 Installation
|
|
||||||
at the root of the pytorch repo
|
|
||||||
```bash
|
|
||||||
pip install -e .ci/lumen_cli
|
|
||||||
```
|
|
||||||
|
|
||||||
## Run the cli tool
|
|
||||||
The cli tool must be used at root of pytorch repo, as example to run build external vllm:
|
|
||||||
```bash
|
|
||||||
python -m cli.run build external vllm
|
|
||||||
```
|
|
||||||
this will run the build steps with default behaviour for vllm project.
|
|
||||||
|
|
||||||
to see help messages, run
|
|
||||||
```bash
|
|
||||||
python3 -m cli.run --help
|
|
||||||
```
|
|
||||||
|
|
||||||
## Add customized external build logics
|
|
||||||
To add a new external build, for instance, add a new external build logics:
|
|
||||||
1. create the build function in cli/lib folder
|
|
||||||
2. register your target and the main build function at EXTERNAL_BUILD_TARGET_DISPATCH in `cli/build_cli/register_build.py`
|
|
||||||
3. [optional] create your ci config file in .github/ci_configs/${EXTERNAL_PACKAGE_NAME}.yaml
|
|
||||||
@ -1,37 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from cli.lib.common.cli_helper import register_targets, RichHelp, TargetSpec
|
|
||||||
from cli.lib.core.vllm.vllm_build import VllmBuildRunner
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Maps targets to their argparse configuration and runner
|
|
||||||
# it adds new target to path python -m cli.run build external {target} with buildrunner
|
|
||||||
_TARGETS: dict[str, TargetSpec] = {
|
|
||||||
"vllm": {
|
|
||||||
"runner": VllmBuildRunner,
|
|
||||||
"help": "Build vLLM using docker buildx.",
|
|
||||||
}
|
|
||||||
# add yours ...
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def register_build_commands(subparsers: argparse._SubParsersAction) -> None:
|
|
||||||
build_parser = subparsers.add_parser(
|
|
||||||
"build",
|
|
||||||
help="Build related commands",
|
|
||||||
formatter_class=RichHelp,
|
|
||||||
)
|
|
||||||
build_subparsers = build_parser.add_subparsers(dest="build_command", required=True)
|
|
||||||
overview = "\n".join(
|
|
||||||
f" {name:12} {spec.get('help', '')}" for name, spec in _TARGETS.items()
|
|
||||||
)
|
|
||||||
external_parser = build_subparsers.add_parser(
|
|
||||||
"external",
|
|
||||||
help="Build external targets",
|
|
||||||
description="Build third-party targets.\n\nAvailable targets:\n" + overview,
|
|
||||||
formatter_class=RichHelp,
|
|
||||||
)
|
|
||||||
register_targets(external_parser, _TARGETS)
|
|
||||||
@ -1,71 +0,0 @@
|
|||||||
"""
|
|
||||||
Cli Argparser Utility helpers for CLI tasks.
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
from abc import ABC, abstractmethod
|
|
||||||
|
|
||||||
|
|
||||||
try:
|
|
||||||
from typing import Any, Callable, Required, TypedDict # Python 3.11+
|
|
||||||
except ImportError:
|
|
||||||
from typing import Any, Callable, TypedDict
|
|
||||||
|
|
||||||
from typing_extensions import Required # Fallback for Python <3.11
|
|
||||||
|
|
||||||
|
|
||||||
class BaseRunner(ABC):
|
|
||||||
def __init__(self, args: Any) -> None:
|
|
||||||
self.args = args
|
|
||||||
|
|
||||||
@abstractmethod
|
|
||||||
def run(self) -> None:
|
|
||||||
"""runs main logics, required"""
|
|
||||||
|
|
||||||
|
|
||||||
# Pretty help: keep newlines + show defaults
|
|
||||||
class RichHelp(
|
|
||||||
argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter
|
|
||||||
):
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class TargetSpec(TypedDict, total=False):
|
|
||||||
"""CLI subcommand specification with bA."""
|
|
||||||
|
|
||||||
runner: Required[type[BaseRunner]]
|
|
||||||
help: str
|
|
||||||
description: str
|
|
||||||
add_arguments: Callable[[argparse.ArgumentParser], None]
|
|
||||||
|
|
||||||
|
|
||||||
def register_targets(
|
|
||||||
parser: argparse.ArgumentParser,
|
|
||||||
target_specs: dict[str, TargetSpec],
|
|
||||||
common_args: Callable[[argparse.ArgumentParser], None] = lambda _: None,
|
|
||||||
) -> None:
|
|
||||||
"""Register target subcommands."""
|
|
||||||
targets = parser.add_subparsers(
|
|
||||||
dest="target",
|
|
||||||
required=True,
|
|
||||||
metavar="{" + ",".join(target_specs.keys()) + "}",
|
|
||||||
)
|
|
||||||
|
|
||||||
for name, spec in target_specs.items():
|
|
||||||
desc = spec.get("description") or spec["runner"].__doc__ or ""
|
|
||||||
|
|
||||||
p = targets.add_parser(
|
|
||||||
name,
|
|
||||||
help=spec.get("help", ""),
|
|
||||||
description=desc.strip(),
|
|
||||||
formatter_class=RichHelp,
|
|
||||||
)
|
|
||||||
p.set_defaults(
|
|
||||||
func=lambda args, cls=spec["runner"]: cls(args).run(),
|
|
||||||
_runner_class=spec["runner"],
|
|
||||||
)
|
|
||||||
if "add_arguments" in spec and callable(spec["add_arguments"]):
|
|
||||||
spec["add_arguments"](p)
|
|
||||||
if common_args:
|
|
||||||
common_args(p)
|
|
||||||
@ -1,42 +0,0 @@
|
|||||||
"""
|
|
||||||
Docker Utility helpers for CLI tasks.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
import docker
|
|
||||||
from docker.errors import APIError, NotFound
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# lazy singleton so we don't reconnect every call
|
|
||||||
_docker_client: Optional[docker.DockerClient] = None
|
|
||||||
|
|
||||||
|
|
||||||
def _get_client() -> docker.DockerClient:
|
|
||||||
global _docker_client
|
|
||||||
if _docker_client is None:
|
|
||||||
_docker_client = docker.from_env()
|
|
||||||
return _docker_client
|
|
||||||
|
|
||||||
|
|
||||||
def local_image_exists(
|
|
||||||
image_name: str, client: Optional[docker.DockerClient] = None
|
|
||||||
) -> bool:
|
|
||||||
"""Return True if a local Docker image exists."""
|
|
||||||
if not image_name:
|
|
||||||
return False
|
|
||||||
|
|
||||||
client = client or _get_client()
|
|
||||||
try:
|
|
||||||
client.images.get(image_name)
|
|
||||||
return True
|
|
||||||
except (NotFound, APIError) as e:
|
|
||||||
logger.error(
|
|
||||||
"Error when checking Docker image '%s': %s",
|
|
||||||
image_name,
|
|
||||||
e.explanation if hasattr(e, "explanation") else str(e),
|
|
||||||
)
|
|
||||||
return False
|
|
||||||
@ -1,110 +0,0 @@
|
|||||||
"""
|
|
||||||
Environment Variables and Dataclasses Utility helpers for CLI tasks.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
from dataclasses import field, fields, is_dataclass, MISSING
|
|
||||||
from pathlib import Path
|
|
||||||
from textwrap import indent
|
|
||||||
from typing import Optional, Union
|
|
||||||
|
|
||||||
from cli.lib.common.utils import str2bool
|
|
||||||
|
|
||||||
|
|
||||||
def get_env(name: str, default: str = "") -> str:
|
|
||||||
"""Get environment variable with default fallback."""
|
|
||||||
return os.environ.get(name) or default
|
|
||||||
|
|
||||||
|
|
||||||
def env_path_optional(
|
|
||||||
name: str,
|
|
||||||
default: Optional[Union[str, Path]] = None,
|
|
||||||
resolve: bool = True,
|
|
||||||
) -> Optional[Path]:
|
|
||||||
"""Get environment variable as optional Path."""
|
|
||||||
val = get_env(name) or default
|
|
||||||
if not val:
|
|
||||||
return None
|
|
||||||
|
|
||||||
path = Path(val)
|
|
||||||
return path.resolve() if resolve else path
|
|
||||||
|
|
||||||
|
|
||||||
def env_path(
|
|
||||||
name: str,
|
|
||||||
default: Optional[Union[str, Path]] = None,
|
|
||||||
resolve: bool = True,
|
|
||||||
) -> Path:
|
|
||||||
"""Get environment variable as Path, raise if missing."""
|
|
||||||
path = env_path_optional(name, default, resolve)
|
|
||||||
if not path:
|
|
||||||
raise ValueError(f"Missing path value for {name}")
|
|
||||||
return path
|
|
||||||
|
|
||||||
|
|
||||||
def env_bool(
|
|
||||||
name: str,
|
|
||||||
default: bool = False,
|
|
||||||
) -> bool:
|
|
||||||
val = get_env(name)
|
|
||||||
if not val:
|
|
||||||
return default
|
|
||||||
return str2bool(val)
|
|
||||||
|
|
||||||
|
|
||||||
def env_bool_field(
|
|
||||||
name: str,
|
|
||||||
default: bool = False,
|
|
||||||
):
|
|
||||||
return field(default_factory=lambda: env_bool(name, default))
|
|
||||||
|
|
||||||
|
|
||||||
def env_path_field(
|
|
||||||
name: str,
|
|
||||||
default: Union[str, Path] = "",
|
|
||||||
*,
|
|
||||||
resolve: bool = True,
|
|
||||||
) -> Path:
|
|
||||||
return field(default_factory=lambda: env_path(name, default, resolve=resolve))
|
|
||||||
|
|
||||||
|
|
||||||
def env_str_field(
|
|
||||||
name: str,
|
|
||||||
default: str = "",
|
|
||||||
) -> str:
|
|
||||||
return field(default_factory=lambda: get_env(name, default))
|
|
||||||
|
|
||||||
|
|
||||||
def generate_dataclass_help(cls) -> str:
|
|
||||||
"""Auto-generate help text for dataclass fields."""
|
|
||||||
if not is_dataclass(cls):
|
|
||||||
raise TypeError(f"{cls} is not a dataclass")
|
|
||||||
|
|
||||||
def get_value(f):
|
|
||||||
if f.default is not MISSING:
|
|
||||||
return f.default
|
|
||||||
if f.default_factory is not MISSING:
|
|
||||||
try:
|
|
||||||
return f.default_factory()
|
|
||||||
except Exception as e:
|
|
||||||
return f"<error: {e}>"
|
|
||||||
return "<required>"
|
|
||||||
|
|
||||||
lines = [f"{f.name:<22} = {repr(get_value(f))}" for f in fields(cls)]
|
|
||||||
return indent("\n".join(lines), " ")
|
|
||||||
|
|
||||||
|
|
||||||
def with_params_help(params_cls: type, title: str = "Parameter defaults"):
|
|
||||||
"""
|
|
||||||
Class decorator that appends a help table generated from another dataclass
|
|
||||||
(e.g., VllmParameters) to the decorated class's docstring.
|
|
||||||
"""
|
|
||||||
if not is_dataclass(params_cls):
|
|
||||||
raise TypeError(f"{params_cls} must be a dataclass")
|
|
||||||
|
|
||||||
def _decorator(cls: type) -> type:
|
|
||||||
block = generate_dataclass_help(params_cls)
|
|
||||||
cls.__doc__ = (cls.__doc__ or "") + f"\n\n{title}:\n{block}"
|
|
||||||
return cls
|
|
||||||
|
|
||||||
return _decorator
|
|
||||||
@ -1,143 +0,0 @@
|
|||||||
from __future__ import annotations
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import textwrap
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import TYPE_CHECKING
|
|
||||||
|
|
||||||
from cli.lib.common.utils import get_wheels
|
|
||||||
from jinja2 import Template
|
|
||||||
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
|
||||||
from collections.abc import Iterable, Mapping
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
_TPL_CONTENT = Template(
|
|
||||||
textwrap.dedent("""\
|
|
||||||
## {{ title }}
|
|
||||||
|
|
||||||
```{{ lang }}
|
|
||||||
{{ content }}
|
|
||||||
```
|
|
||||||
""")
|
|
||||||
)
|
|
||||||
|
|
||||||
_TPL_LIST_ITEMS = Template(
|
|
||||||
textwrap.dedent("""\
|
|
||||||
## {{ title }}
|
|
||||||
{% for it in items %}
|
|
||||||
- {{ it.pkg }}: {{ it.relpath }}
|
|
||||||
{% else %}
|
|
||||||
_(no item found)_
|
|
||||||
{% endfor %}
|
|
||||||
""")
|
|
||||||
)
|
|
||||||
|
|
||||||
_TPL_TABLE = Template(
|
|
||||||
textwrap.dedent("""\
|
|
||||||
{%- if rows %}
|
|
||||||
| {{ cols | join(' | ') }} |
|
|
||||||
|{%- for _ in cols %} --- |{%- endfor %}
|
|
||||||
{%- for r in rows %}
|
|
||||||
| {%- for c in cols %} {{ r.get(c, "") }} |{%- endfor %}
|
|
||||||
{%- endfor %}
|
|
||||||
{%- else %}
|
|
||||||
_(no data)_
|
|
||||||
{%- endif %}
|
|
||||||
""")
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def gh_summary_path() -> Path | None:
|
|
||||||
"""Return the Path to the GitHub step summary file, or None if not set."""
|
|
||||||
p = os.environ.get("GITHUB_STEP_SUMMARY")
|
|
||||||
return Path(p) if p else None
|
|
||||||
|
|
||||||
|
|
||||||
def write_gh_step_summary(md: str, *, append_content: bool = True) -> bool:
|
|
||||||
"""
|
|
||||||
Write Markdown content to the GitHub Step Summary file if GITHUB_STEP_SUMMARY is set.
|
|
||||||
append_content: default true, if True, append to the end of the file, else overwrite the whole file
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
True if written successfully (in GitHub Actions environment),
|
|
||||||
False if skipped (e.g., running locally where the variable is not set).
|
|
||||||
"""
|
|
||||||
sp = gh_summary_path()
|
|
||||||
if not sp:
|
|
||||||
logger.info("[gh-summary] GITHUB_STEP_SUMMARY not set, skipping write.")
|
|
||||||
return False
|
|
||||||
|
|
||||||
md_clean = textwrap.dedent(md).strip() + "\n"
|
|
||||||
|
|
||||||
mode = "a" if append_content else "w"
|
|
||||||
with sp.open(mode, encoding="utf-8") as f:
|
|
||||||
f.write(md_clean)
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def md_heading(text: str, level: int = 2) -> str:
|
|
||||||
"""Generate a Markdown heading string with the given level (1-6)."""
|
|
||||||
return f"{'#' * max(1, min(level, 6))} {text}\n"
|
|
||||||
|
|
||||||
|
|
||||||
def md_details(summary: str, content: str) -> str:
|
|
||||||
"""Generate a collapsible <details> block with a summary and inner content."""
|
|
||||||
return f"<details>\n<summary>{summary}</summary>\n\n{content}\n\n</details>\n"
|
|
||||||
|
|
||||||
|
|
||||||
def summarize_content_from_file(
|
|
||||||
output_dir: Path,
|
|
||||||
freeze_file: str,
|
|
||||||
title: str = "Content from file",
|
|
||||||
code_lang: str = "", # e.g. "text" or "ini"
|
|
||||||
) -> bool:
|
|
||||||
f = Path(output_dir) / freeze_file
|
|
||||||
if not f.exists():
|
|
||||||
return False
|
|
||||||
content = f.read_text(encoding="utf-8").strip()
|
|
||||||
md = render_content(content, title=title, lang=code_lang)
|
|
||||||
return write_gh_step_summary(md)
|
|
||||||
|
|
||||||
|
|
||||||
def summarize_wheels(path: Path, title: str = "Wheels", max_depth: int = 3):
|
|
||||||
items = get_wheels(path, max_depth=max_depth)
|
|
||||||
if not items:
|
|
||||||
return False
|
|
||||||
md = render_list(items, title=title)
|
|
||||||
return write_gh_step_summary(md)
|
|
||||||
|
|
||||||
|
|
||||||
def md_kv_table(rows: Iterable[Mapping[str, str | int | float]]) -> str:
|
|
||||||
"""
|
|
||||||
Render a list of dicts as a Markdown table using Jinja template.
|
|
||||||
"""
|
|
||||||
rows = list(rows)
|
|
||||||
cols = list({k for r in rows for k in r.keys()})
|
|
||||||
md = _TPL_TABLE.render(cols=cols, rows=rows).strip() + "\n"
|
|
||||||
return md
|
|
||||||
|
|
||||||
|
|
||||||
def render_list(
|
|
||||||
items: Iterable[str],
|
|
||||||
*,
|
|
||||||
title: str = "List",
|
|
||||||
) -> str:
|
|
||||||
tpl = _TPL_LIST_ITEMS
|
|
||||||
md = tpl.render(title=title, items=items)
|
|
||||||
return md
|
|
||||||
|
|
||||||
|
|
||||||
def render_content(
|
|
||||||
content: str,
|
|
||||||
*,
|
|
||||||
title: str = "Content",
|
|
||||||
lang: str = "text",
|
|
||||||
) -> str:
|
|
||||||
tpl = _TPL_CONTENT
|
|
||||||
md = tpl.render(title=title, content=content, lang=lang)
|
|
||||||
return md
|
|
||||||
@ -1,69 +0,0 @@
|
|||||||
"""
|
|
||||||
Git Utility helpers for CLI tasks.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from cli.lib.common.path_helper import remove_dir
|
|
||||||
from git import GitCommandError, RemoteProgress, Repo
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
class PrintProgress(RemoteProgress):
|
|
||||||
"""Simple progress logger for git operations."""
|
|
||||||
|
|
||||||
def __init__(self, interval: int = 5):
|
|
||||||
super().__init__()
|
|
||||||
self._last_percent = -1
|
|
||||||
self._interval = interval
|
|
||||||
|
|
||||||
def update(self, op_code, cur, max=None, message=""):
|
|
||||||
msg = self._cur_line or message
|
|
||||||
if max and cur:
|
|
||||||
percent = int(cur / max * 100)
|
|
||||||
if percent != self._last_percent and percent % self._interval == 0:
|
|
||||||
self._last_percent = percent
|
|
||||||
logger.info("Progress: %d%% - %s", percent, msg)
|
|
||||||
elif msg:
|
|
||||||
logger.info(msg)
|
|
||||||
|
|
||||||
|
|
||||||
def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules=False):
|
|
||||||
"""Clone repository with pinned commit and optional submodules."""
|
|
||||||
dst = dst or target
|
|
||||||
|
|
||||||
try:
|
|
||||||
logger.info("Cloning %s to %s", target, dst)
|
|
||||||
|
|
||||||
# Clone and fetch
|
|
||||||
remove_dir(dst)
|
|
||||||
r = Repo.clone_from(repo, dst, progress=PrintProgress())
|
|
||||||
r.git.fetch("--all", "--tags")
|
|
||||||
|
|
||||||
# Checkout pinned commit
|
|
||||||
commit = get_post_build_pinned_commit(target)
|
|
||||||
logger.info("Checking out pinned %s commit %s", target, commit)
|
|
||||||
r.git.checkout(commit)
|
|
||||||
|
|
||||||
# Update submodules if requested
|
|
||||||
if update_submodules and r.submodules:
|
|
||||||
logger.info("Updating %d submodule(s)", len(r.submodules))
|
|
||||||
for sm in r.submodules:
|
|
||||||
sm.update(init=True, recursive=True, progress=PrintProgress())
|
|
||||||
|
|
||||||
logger.info("Successfully cloned %s", target)
|
|
||||||
return r, commit
|
|
||||||
|
|
||||||
except GitCommandError as e:
|
|
||||||
logger.error("Git operation failed: %s", e)
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def get_post_build_pinned_commit(name: str, prefix=".github/ci_commit_pins") -> str:
|
|
||||||
path = Path(prefix) / f"{name}.txt"
|
|
||||||
if not path.exists():
|
|
||||||
raise FileNotFoundError(f"Pin file not found: {path}")
|
|
||||||
return path.read_text(encoding="utf-8").strip()
|
|
||||||
@ -1,14 +0,0 @@
|
|||||||
"""
|
|
||||||
Logger Utility helpers for CLI tasks.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import sys
|
|
||||||
|
|
||||||
|
|
||||||
def setup_logging(level: int = logging.INFO):
|
|
||||||
logging.basicConfig(
|
|
||||||
level=level,
|
|
||||||
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
|
||||||
stream=sys.stdout,
|
|
||||||
)
|
|
||||||
@ -1,62 +0,0 @@
|
|||||||
"""Path utility helpers for CLI tasks."""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import shutil
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Union
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def get_path(path: Union[str, Path], resolve: bool = False) -> Path:
|
|
||||||
"""Convert to Path object, optionally resolving to absolute path."""
|
|
||||||
if not path:
|
|
||||||
raise ValueError("Path cannot be None or empty")
|
|
||||||
result = Path(path)
|
|
||||||
return result.resolve() if resolve else result
|
|
||||||
|
|
||||||
|
|
||||||
def ensure_dir_exists(path: Union[str, Path]) -> Path:
|
|
||||||
"""Create directory if it doesn't exist."""
|
|
||||||
path_obj = get_path(path)
|
|
||||||
path_obj.mkdir(parents=True, exist_ok=True)
|
|
||||||
return path_obj
|
|
||||||
|
|
||||||
|
|
||||||
def remove_dir(path: Union[str, Path, None]) -> None:
|
|
||||||
"""Remove directory if it exists."""
|
|
||||||
if not path:
|
|
||||||
return
|
|
||||||
path_obj = get_path(path)
|
|
||||||
if path_obj.exists():
|
|
||||||
shutil.rmtree(path_obj)
|
|
||||||
|
|
||||||
|
|
||||||
def force_create_dir(path: Union[str, Path]) -> Path:
|
|
||||||
"""Remove directory if exists, then create fresh empty directory."""
|
|
||||||
remove_dir(path)
|
|
||||||
return ensure_dir_exists(path)
|
|
||||||
|
|
||||||
|
|
||||||
def copy(src: Union[str, Path], dst: Union[str, Path]) -> None:
|
|
||||||
"""Copy file or directory from src to dst."""
|
|
||||||
src_path = get_path(src, resolve=True)
|
|
||||||
dst_path = get_path(dst, resolve=True)
|
|
||||||
|
|
||||||
if not src_path.exists():
|
|
||||||
raise FileNotFoundError(f"Source does not exist: {src_path}")
|
|
||||||
|
|
||||||
dst_path.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
if src_path.is_file():
|
|
||||||
shutil.copy2(src_path, dst_path)
|
|
||||||
elif src_path.is_dir():
|
|
||||||
shutil.copytree(src_path, dst_path, dirs_exist_ok=True)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unsupported path type: {src_path}")
|
|
||||||
|
|
||||||
|
|
||||||
def is_path_exist(path: Union[str, Path, None]) -> bool:
|
|
||||||
"""Check if path exists."""
|
|
||||||
return bool(path and get_path(path).exists())
|
|
||||||
@ -1,71 +0,0 @@
|
|||||||
import glob
|
|
||||||
import logging
|
|
||||||
import shlex
|
|
||||||
import shutil
|
|
||||||
import sys
|
|
||||||
from collections.abc import Iterable
|
|
||||||
from importlib.metadata import PackageNotFoundError, version # noqa: UP035
|
|
||||||
from typing import Optional, Union
|
|
||||||
|
|
||||||
from cli.lib.common.utils import run_command
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def pip_install_packages(
|
|
||||||
packages: Iterable[str] = (),
|
|
||||||
env=None,
|
|
||||||
*,
|
|
||||||
requirements: Optional[str] = None,
|
|
||||||
constraints: Optional[str] = None,
|
|
||||||
prefer_uv: bool = False,
|
|
||||||
) -> None:
|
|
||||||
use_uv = prefer_uv and shutil.which("uv") is not None
|
|
||||||
base = (
|
|
||||||
[sys.executable, "-m", "uv", "pip", "install"]
|
|
||||||
if use_uv
|
|
||||||
else [sys.executable, "-m", "pip", "install"]
|
|
||||||
)
|
|
||||||
cmd = base[:]
|
|
||||||
if requirements:
|
|
||||||
cmd += ["-r", requirements]
|
|
||||||
if constraints:
|
|
||||||
cmd += ["-c", constraints]
|
|
||||||
cmd += list(packages)
|
|
||||||
logger.info("pip installing packages: %s", " ".join(map(shlex.quote, cmd)))
|
|
||||||
run_command(" ".join(map(shlex.quote, cmd)), env=env)
|
|
||||||
|
|
||||||
|
|
||||||
def pip_install_first_match(pattern: str, extras: Optional[str] = None, pref_uv=False):
|
|
||||||
wheel = first_matching_pkg(pattern)
|
|
||||||
target = f"{wheel}[{extras}]" if extras else wheel
|
|
||||||
logger.info("Installing %s...", target)
|
|
||||||
pip_install_packages([target], prefer_uv=pref_uv)
|
|
||||||
|
|
||||||
|
|
||||||
def run_python(args: Union[str, list[str]], env=None):
|
|
||||||
"""
|
|
||||||
Run the python in the current environment.
|
|
||||||
"""
|
|
||||||
if isinstance(args, str):
|
|
||||||
args = shlex.split(args)
|
|
||||||
cmd = [sys.executable] + args
|
|
||||||
run_command(" ".join(map(shlex.quote, cmd)), env=env)
|
|
||||||
|
|
||||||
|
|
||||||
def pkg_exists(name: str) -> bool:
|
|
||||||
try:
|
|
||||||
pkg_version = version(name)
|
|
||||||
logger.info("%s already exist with version: %s", name, pkg_version)
|
|
||||||
return True
|
|
||||||
except PackageNotFoundError:
|
|
||||||
logger.info("%s is not installed", name)
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def first_matching_pkg(pattern: str) -> str:
|
|
||||||
matches = sorted(glob.glob(pattern))
|
|
||||||
if not matches:
|
|
||||||
raise FileNotFoundError(f"No wheel matching: {pattern}")
|
|
||||||
return matches[0]
|
|
||||||
@ -1,139 +0,0 @@
|
|||||||
"""
|
|
||||||
General Utility helpers for CLI tasks.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
import os
|
|
||||||
import shlex
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from contextlib import contextmanager
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def run_command(
|
|
||||||
cmd: str,
|
|
||||||
use_shell: bool = False,
|
|
||||||
log_cmd: bool = True,
|
|
||||||
cwd: Optional[str] = None,
|
|
||||||
env: Optional[dict] = None,
|
|
||||||
check: bool = True,
|
|
||||||
) -> int:
|
|
||||||
"""Run a command with optional shell execution."""
|
|
||||||
if use_shell:
|
|
||||||
args = cmd
|
|
||||||
log_prefix = "[shell]"
|
|
||||||
executable = "/bin/bash"
|
|
||||||
else:
|
|
||||||
args = shlex.split(cmd)
|
|
||||||
log_prefix = "[cmd]"
|
|
||||||
executable = None
|
|
||||||
|
|
||||||
if log_cmd:
|
|
||||||
display_cmd = cmd if use_shell else " ".join(args)
|
|
||||||
logger.info("%s %s", log_prefix, display_cmd)
|
|
||||||
|
|
||||||
run_env = {**os.environ, **(env or {})}
|
|
||||||
|
|
||||||
proc = subprocess.run(
|
|
||||||
args,
|
|
||||||
shell=use_shell,
|
|
||||||
executable=executable,
|
|
||||||
stdout=sys.stdout,
|
|
||||||
stderr=sys.stderr,
|
|
||||||
cwd=cwd,
|
|
||||||
env=run_env,
|
|
||||||
check=False,
|
|
||||||
)
|
|
||||||
|
|
||||||
if check and proc.returncode != 0:
|
|
||||||
logger.error(
|
|
||||||
"%s Command failed (exit %s): %s", log_prefix, proc.returncode, cmd
|
|
||||||
)
|
|
||||||
raise subprocess.CalledProcessError(
|
|
||||||
proc.returncode, args if not use_shell else cmd
|
|
||||||
)
|
|
||||||
|
|
||||||
return proc.returncode
|
|
||||||
|
|
||||||
|
|
||||||
def str2bool(value: Optional[str]) -> bool:
|
|
||||||
"""Convert environment variables to boolean values."""
|
|
||||||
if not value:
|
|
||||||
return False
|
|
||||||
if not isinstance(value, str):
|
|
||||||
raise ValueError(
|
|
||||||
f"Expected a string value for boolean conversion, got {type(value)}"
|
|
||||||
)
|
|
||||||
value = value.strip().lower()
|
|
||||||
|
|
||||||
true_value_set = {"1", "true", "t", "yes", "y", "on", "enable", "enabled", "found"}
|
|
||||||
false_value_set = {"0", "false", "f", "no", "n", "off", "disable"}
|
|
||||||
|
|
||||||
if value in true_value_set:
|
|
||||||
return True
|
|
||||||
if value in false_value_set:
|
|
||||||
return False
|
|
||||||
raise ValueError(f"Invalid string value for boolean conversion: {value}")
|
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def temp_environ(updates: dict[str, str]):
|
|
||||||
"""
|
|
||||||
Temporarily set environment variables and restore them after the block.
|
|
||||||
Args:
|
|
||||||
updates: Dict of environment variables to set.
|
|
||||||
"""
|
|
||||||
missing = object()
|
|
||||||
old: dict[str, str | object] = {k: os.environ.get(k, missing) for k in updates}
|
|
||||||
try:
|
|
||||||
os.environ.update(updates)
|
|
||||||
yield
|
|
||||||
finally:
|
|
||||||
for k, v in old.items():
|
|
||||||
if v is missing:
|
|
||||||
os.environ.pop(k, None)
|
|
||||||
else:
|
|
||||||
os.environ[k] = v # type: ignore[arg-type]
|
|
||||||
|
|
||||||
|
|
||||||
@contextmanager
|
|
||||||
def working_directory(path: str):
|
|
||||||
"""
|
|
||||||
Temporarily change the working directory inside a context.
|
|
||||||
"""
|
|
||||||
if not path:
|
|
||||||
# No-op context
|
|
||||||
yield
|
|
||||||
return
|
|
||||||
prev_cwd = os.getcwd()
|
|
||||||
try:
|
|
||||||
os.chdir(path)
|
|
||||||
yield
|
|
||||||
finally:
|
|
||||||
os.chdir(prev_cwd)
|
|
||||||
|
|
||||||
|
|
||||||
def get_wheels(
|
|
||||||
output_dir: Path,
|
|
||||||
max_depth: Optional[int] = None,
|
|
||||||
) -> list[str]:
|
|
||||||
"""Return a list of wheels found in the given output directory."""
|
|
||||||
root = Path(output_dir)
|
|
||||||
if not root.exists():
|
|
||||||
return []
|
|
||||||
items = []
|
|
||||||
for dirpath, _, filenames in os.walk(root):
|
|
||||||
depth = Path(dirpath).relative_to(root).parts
|
|
||||||
if max_depth is not None and len(depth) > max_depth:
|
|
||||||
continue
|
|
||||||
for fname in sorted(filenames):
|
|
||||||
if fname.endswith(".whl"):
|
|
||||||
pkg = fname.split("-")[0]
|
|
||||||
relpath = str((Path(dirpath) / fname).relative_to(root))
|
|
||||||
items.append({"pkg": pkg, "relpath": relpath})
|
|
||||||
return items
|
|
||||||
@ -1,292 +0,0 @@
|
|||||||
import logging
|
|
||||||
import os
|
|
||||||
import textwrap
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from cli.lib.common.gh_summary import write_gh_step_summary
|
|
||||||
from cli.lib.common.git_helper import clone_external_repo
|
|
||||||
from cli.lib.common.pip_helper import pip_install_packages
|
|
||||||
from cli.lib.common.utils import run_command, temp_environ, working_directory
|
|
||||||
from jinja2 import Template
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
_TPL_VLLM_INFO = Template(
|
|
||||||
textwrap.dedent("""\
|
|
||||||
## Vllm against Pytorch CI Test Summary
|
|
||||||
**Vllm Commit**: [{{ vllm_commit }}](https://github.com/vllm-project/vllm/commit/{{ vllm_commit }})
|
|
||||||
{%- if torch_sha %}
|
|
||||||
**Pytorch Commit**: [{{ torch_sha }}](https://github.com/pytorch/pytorch/commit/{{ torch_sha }})
|
|
||||||
{%- endif %}
|
|
||||||
""")
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def sample_vllm_test_library():
|
|
||||||
"""
|
|
||||||
Simple sample to unblock the vllm ci development, which is mimic to
|
|
||||||
https://github.com/vllm-project/vllm/blob/main/.buildkite/test-pipeline.yaml
|
|
||||||
see run_test_plan for more details
|
|
||||||
"""
|
|
||||||
# TODO(elainewy): Read from yaml file to handle the env and tests for vllm
|
|
||||||
return {
|
|
||||||
"vllm_basic_correctness_test": {
|
|
||||||
"title": "Basic Correctness Test",
|
|
||||||
"id": "vllm_basic_correctness_test",
|
|
||||||
"env_vars": {
|
|
||||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
|
||||||
},
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s basic_correctness/test_cumem.py",
|
|
||||||
"pytest -v -s basic_correctness/test_basic_correctness.py",
|
|
||||||
"pytest -v -s basic_correctness/test_cpu_offload.py",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_basic_models_test": {
|
|
||||||
"title": "Basic models test",
|
|
||||||
"id": "vllm_basic_models_test",
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s models/test_transformers.py",
|
|
||||||
"pytest -v -s models/test_registry.py",
|
|
||||||
"pytest -v -s models/test_utils.py",
|
|
||||||
"pytest -v -s models/test_vision.py",
|
|
||||||
"pytest -v -s models/test_initialization.py",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_entrypoints_test": {
|
|
||||||
"title": "Entrypoints Test ",
|
|
||||||
"id": "vllm_entrypoints_test",
|
|
||||||
"env_vars": {
|
|
||||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
|
||||||
},
|
|
||||||
"steps": [
|
|
||||||
" ".join(
|
|
||||||
[
|
|
||||||
"pytest",
|
|
||||||
"-v",
|
|
||||||
"-s",
|
|
||||||
"entrypoints/llm",
|
|
||||||
"--ignore=entrypoints/llm/test_generate.py",
|
|
||||||
"--ignore=entrypoints/llm/test_collective_rpc.py",
|
|
||||||
]
|
|
||||||
),
|
|
||||||
"pytest -v -s entrypoints/llm/test_generate.py",
|
|
||||||
"pytest -v -s entrypoints/offline_mode",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_regression_test": {
|
|
||||||
"title": "Regression Test",
|
|
||||||
"id": "vllm_regression_test",
|
|
||||||
"package_install": ["modelscope"],
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s test_regression.py",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_lora_tp_test_distributed": {
|
|
||||||
"title": "LoRA TP Test (Distributed)",
|
|
||||||
"id": "vllm_lora_tp_test_distributed",
|
|
||||||
"env_vars": {
|
|
||||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
|
||||||
},
|
|
||||||
"num_gpus": 4,
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s -x lora/test_chatglm3_tp.py",
|
|
||||||
"pytest -v -s -x lora/test_llama_tp.py",
|
|
||||||
"pytest -v -s -x lora/test_llm_with_multi_loras.py",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_distributed_test_28_failure_test": {
|
|
||||||
"title": "Distributed Tests (2 GPUs) pytorch 2.8 release failure",
|
|
||||||
"id": "vllm_distributed_test_28_failure_test",
|
|
||||||
"env_vars": {
|
|
||||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
|
||||||
},
|
|
||||||
"num_gpus": 4,
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s distributed/test_sequence_parallel.py",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_lora_28_failure_test": {
|
|
||||||
"title": "LoRA pytorch 2.8 failure test",
|
|
||||||
"id": "vllm_lora_28_failure_test",
|
|
||||||
"steps": ["pytest -v lora/test_quant_model.py"],
|
|
||||||
},
|
|
||||||
"vllm_multi_model_processor_test": {
|
|
||||||
"title": "Multi-Modal Processor Test",
|
|
||||||
"id": "vllm_multi_model_processor_test",
|
|
||||||
"package_install": ["git+https://github.com/TIGER-AI-Lab/Mantis.git"],
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_multi_model_test_28_failure_test": {
|
|
||||||
"title": "Multi-Model Test (Failed 2.8 release)",
|
|
||||||
"id": "vllm_multi_model_test_28_failure_test",
|
|
||||||
"package_install": ["git+https://github.com/TIGER-AI-Lab/Mantis.git"],
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s models/multimodal/generation/test_voxtral.py",
|
|
||||||
"pytest -v -s models/multimodal/pooling",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_pytorch_compilation_unit_tests": {
|
|
||||||
"title": "PyTorch Compilation Unit Tests",
|
|
||||||
"id": "vllm_pytorch_compilation_unit_tests",
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s compile/test_pass_manager.py",
|
|
||||||
"pytest -v -s compile/test_fusion.py",
|
|
||||||
"pytest -v -s compile/test_fusion_attn.py",
|
|
||||||
"pytest -v -s compile/test_silu_mul_quant_fusion.py",
|
|
||||||
"pytest -v -s compile/test_sequence_parallelism.py",
|
|
||||||
"pytest -v -s compile/test_async_tp.py",
|
|
||||||
"pytest -v -s compile/test_fusion_all_reduce.py",
|
|
||||||
"pytest -v -s compile/test_decorator.py",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_languagde_model_test_extended_generation_28_failure_test": {
|
|
||||||
"title": "Language Models Test (Extended Generation) 2.8 release failure",
|
|
||||||
"id": "vllm_languagde_model_test_extended_generation_28_failure_test",
|
|
||||||
"package_install": [
|
|
||||||
"--no-build-isolation",
|
|
||||||
"git+https://github.com/Dao-AILab/causal-conv1d@v1.5.0.post8",
|
|
||||||
],
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s models/language/generation/test_mistral.py",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
"vllm_distributed_test_2_gpu_28_failure_test": {
|
|
||||||
"title": "Distributed Tests (2 GPUs) pytorch 2.8 release failure",
|
|
||||||
"id": "vllm_distributed_test_2_gpu_28_failure_test",
|
|
||||||
"env_vars": {
|
|
||||||
"VLLM_WORKER_MULTIPROC_METHOD": "spawn",
|
|
||||||
},
|
|
||||||
"num_gpus": 4,
|
|
||||||
"steps": [
|
|
||||||
"pytest -v -s distributed/test_sequence_parallel.py",
|
|
||||||
],
|
|
||||||
},
|
|
||||||
# TODO(elainewy):need to add g6 with 4 gpus to run this test
|
|
||||||
"vllm_lora_test": {
|
|
||||||
"title": "LoRA Test %N",
|
|
||||||
"id": "lora_test",
|
|
||||||
"parallelism": 4,
|
|
||||||
"steps": [
|
|
||||||
"echo '[checking] list sharded lora tests:'",
|
|
||||||
" ".join(
|
|
||||||
[
|
|
||||||
"pytest -q --collect-only lora",
|
|
||||||
"--shard-id=$$BUILDKITE_PARALLEL_JOB",
|
|
||||||
"--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT",
|
|
||||||
"--ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py",
|
|
||||||
]
|
|
||||||
),
|
|
||||||
"echo '[checking] Done. list lora tests'",
|
|
||||||
" ".join(
|
|
||||||
[
|
|
||||||
"pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB",
|
|
||||||
"--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT",
|
|
||||||
"--ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py",
|
|
||||||
]
|
|
||||||
),
|
|
||||||
],
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def check_parallelism(tests: Any, title: str, shard_id: int = 0, num_shards: int = 0):
|
|
||||||
"""
|
|
||||||
a method to check if the test plan is parallelism or not.
|
|
||||||
"""
|
|
||||||
parallelism = int(tests.get("parallelism", "0"))
|
|
||||||
is_parallel = parallelism and parallelism > 1
|
|
||||||
|
|
||||||
if not is_parallel:
|
|
||||||
return False
|
|
||||||
|
|
||||||
if shard_id > num_shards:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Test {title} expects {num_shards} shards, but invalid {shard_id} is provided"
|
|
||||||
)
|
|
||||||
|
|
||||||
if num_shards != parallelism:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Test {title} expects {parallelism} shards, but invalid {num_shards} is provided"
|
|
||||||
)
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def run_test_plan(
|
|
||||||
test_plan: str,
|
|
||||||
test_target: str,
|
|
||||||
tests_map: dict[str, Any],
|
|
||||||
shard_id: int = 0,
|
|
||||||
num_shards: int = 0,
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
a method to run list of tests based on the test plan.
|
|
||||||
"""
|
|
||||||
logger.info("run %s tests.....", test_target)
|
|
||||||
if test_plan not in tests_map:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"test {test_plan} not found, please add it to test plan pool"
|
|
||||||
)
|
|
||||||
tests = tests_map[test_plan]
|
|
||||||
pkgs = tests.get("package_install", [])
|
|
||||||
title = tests.get("title", "unknown test")
|
|
||||||
|
|
||||||
is_parallel = check_parallelism(tests, title, shard_id, num_shards)
|
|
||||||
if is_parallel:
|
|
||||||
title = title.replace("%N", f"{shard_id}/{num_shards}")
|
|
||||||
|
|
||||||
logger.info("Running tests: %s", title)
|
|
||||||
if pkgs:
|
|
||||||
logger.info("Installing packages: %s", pkgs)
|
|
||||||
pip_install_packages(packages=pkgs, prefer_uv=True)
|
|
||||||
with (
|
|
||||||
working_directory(tests.get("working_directory", "tests")),
|
|
||||||
temp_environ(tests.get("env_vars", {})),
|
|
||||||
):
|
|
||||||
failures = []
|
|
||||||
for step in tests["steps"]:
|
|
||||||
logger.info("Running step: %s", step)
|
|
||||||
if is_parallel:
|
|
||||||
step = replace_buildkite_placeholders(step, shard_id, num_shards)
|
|
||||||
logger.info("Running parallel step: %s", step)
|
|
||||||
code = run_command(cmd=step, check=False, use_shell=True)
|
|
||||||
if code != 0:
|
|
||||||
failures.append(step)
|
|
||||||
logger.info("Finish running step: %s", step)
|
|
||||||
if failures:
|
|
||||||
logger.error("Failed tests: %s", failures)
|
|
||||||
raise RuntimeError(f"{len(failures)} pytest runs failed: {failures}")
|
|
||||||
logger.info("Done. All tests passed")
|
|
||||||
|
|
||||||
|
|
||||||
def clone_vllm(dst: str = "vllm"):
|
|
||||||
_, commit = clone_external_repo(
|
|
||||||
target="vllm",
|
|
||||||
repo="https://github.com/vllm-project/vllm.git",
|
|
||||||
dst=dst,
|
|
||||||
update_submodules=True,
|
|
||||||
)
|
|
||||||
return commit
|
|
||||||
|
|
||||||
|
|
||||||
def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) -> str:
|
|
||||||
mapping = {
|
|
||||||
"$$BUILDKITE_PARALLEL_JOB_COUNT": str(num_shards),
|
|
||||||
"$$BUILDKITE_PARALLEL_JOB": str(shard_id),
|
|
||||||
}
|
|
||||||
for k in sorted(mapping, key=len, reverse=True):
|
|
||||||
step = step.replace(k, mapping[k])
|
|
||||||
return step
|
|
||||||
|
|
||||||
|
|
||||||
def summarize_build_info(vllm_commit: str) -> bool:
|
|
||||||
torch_sha = os.getenv("GITHUB_SHA")
|
|
||||||
md = (
|
|
||||||
_TPL_VLLM_INFO.render(vllm_commit=vllm_commit, torch_sha=torch_sha).strip()
|
|
||||||
+ "\n"
|
|
||||||
)
|
|
||||||
return write_gh_step_summary(md)
|
|
||||||
@ -1,285 +0,0 @@
|
|||||||
import logging
|
|
||||||
import os
|
|
||||||
import textwrap
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Optional
|
|
||||||
|
|
||||||
from cli.lib.common.cli_helper import BaseRunner
|
|
||||||
from cli.lib.common.docker_helper import local_image_exists
|
|
||||||
from cli.lib.common.envs_helper import (
|
|
||||||
env_bool_field,
|
|
||||||
env_path_field,
|
|
||||||
env_str_field,
|
|
||||||
with_params_help,
|
|
||||||
)
|
|
||||||
from cli.lib.common.gh_summary import (
|
|
||||||
gh_summary_path,
|
|
||||||
summarize_content_from_file,
|
|
||||||
summarize_wheels,
|
|
||||||
)
|
|
||||||
from cli.lib.common.path_helper import (
|
|
||||||
copy,
|
|
||||||
ensure_dir_exists,
|
|
||||||
force_create_dir,
|
|
||||||
get_path,
|
|
||||||
is_path_exist,
|
|
||||||
)
|
|
||||||
from cli.lib.common.utils import run_command
|
|
||||||
from cli.lib.core.vllm.lib import clone_vllm, summarize_build_info
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
# Default path for docker build artifacts
|
|
||||||
_DEFAULT_RESULT_PATH = "./shared"
|
|
||||||
|
|
||||||
# Temp folder in vllm work place to cp torch whls in vllm work directory for docker build
|
|
||||||
_VLLM_TEMP_FOLDER = "tmp"
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class VllmBuildParameters:
|
|
||||||
"""
|
|
||||||
Parameters defining the vllm external input configurations.
|
|
||||||
Combine with VllmDockerBuildArgs to define the vllm build environment
|
|
||||||
"""
|
|
||||||
|
|
||||||
# USE_TORCH_WHEEL: when true, use local Torch wheels; requires TORCH_WHEELS_PATH.
|
|
||||||
# Otherwise docker build pull torch nightly during build
|
|
||||||
# TORCH_WHEELS_PATH: directory containing local torch wheels when use_torch_whl is True
|
|
||||||
use_torch_whl: bool = env_bool_field("USE_TORCH_WHEEL", True)
|
|
||||||
torch_whls_path: Path = env_path_field("TORCH_WHEELS_PATH", "./dist")
|
|
||||||
|
|
||||||
# USE_LOCAL_BASE_IMAGE: when true, use an existing local Docker base image; requires BASE_IMAGE
|
|
||||||
# Otherwise, pull dockerfile's default image remotely
|
|
||||||
# BASE_IMAGE: name:tag (only needed when use_local_base_image is True)
|
|
||||||
use_local_base_image: bool = env_bool_field("USE_LOCAL_BASE_IMAGE", True)
|
|
||||||
base_image: str = env_str_field("BASE_IMAGE")
|
|
||||||
|
|
||||||
# USE_LOCAL_DOCKERFILE: when true("1"), use a local Dockerfile; requires DOCKERFILE_PATH.
|
|
||||||
# otherwise, use vllm's default dockerfile.torch_nightly for build
|
|
||||||
# DOCKERFILE_PATH: path to Dockerfile used when use_local_dockerfile is True"
|
|
||||||
use_local_dockerfile: bool = env_bool_field("USE_LOCAL_DOCKERFILE", True)
|
|
||||||
dockerfile_path: Path = env_path_field(
|
|
||||||
"DOCKERFILE_PATH", ".github/ci_configs/vllm/Dockerfile.tmp_vllm"
|
|
||||||
)
|
|
||||||
|
|
||||||
# OUTPUT_DIR: where docker buildx (local exporter) will write artifacts
|
|
||||||
output_dir: Path = env_path_field("OUTPUT_DIR", "external/vllm")
|
|
||||||
|
|
||||||
# --- Build args ----------------------------------------------------------
|
|
||||||
target_stage: str = env_str_field("TARGET_STAGE", "export-wheels")
|
|
||||||
|
|
||||||
tag_name: str = env_str_field("TAG", "vllm-wheels")
|
|
||||||
|
|
||||||
cuda_version: str = env_str_field("CUDA_VERSION", "12.8.1")
|
|
||||||
|
|
||||||
python_version: str = env_str_field("PYTHON_VERSION", "3.12")
|
|
||||||
|
|
||||||
max_jobs: str = env_str_field("MAX_JOBS", "64")
|
|
||||||
|
|
||||||
sccache_bucket: str = env_str_field("SCCACHE_BUCKET")
|
|
||||||
|
|
||||||
sccache_region: str = env_str_field("SCCACHE_REGION")
|
|
||||||
|
|
||||||
torch_cuda_arch_list: str = env_str_field("TORCH_CUDA_ARCH_LIST", "8.9")
|
|
||||||
|
|
||||||
def __post_init__(self):
|
|
||||||
checks = [
|
|
||||||
(
|
|
||||||
self.use_torch_whl, # flag
|
|
||||||
True, # trigger_value
|
|
||||||
"torch_whls_path", # resource
|
|
||||||
is_path_exist, # check_func
|
|
||||||
"TORCH_WHEELS_PATH is not provided, but USE_TORCH_WHEEL is set to 1",
|
|
||||||
),
|
|
||||||
(
|
|
||||||
self.use_local_base_image,
|
|
||||||
True,
|
|
||||||
"base_image",
|
|
||||||
local_image_exists,
|
|
||||||
f"BASE_IMAGE {self.base_image} does not found, but USE_LOCAL_BASE_IMAGE is set to 1",
|
|
||||||
),
|
|
||||||
(
|
|
||||||
self.use_local_dockerfile,
|
|
||||||
True,
|
|
||||||
"dockerfile_path",
|
|
||||||
is_path_exist,
|
|
||||||
" DOCKERFILE_PATH path does not found, but USE_LOCAL_DOCKERFILE is set to 1",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
for flag, trigger_value, attr_name, check_func, error_msg in checks:
|
|
||||||
value = getattr(self, attr_name)
|
|
||||||
if flag == trigger_value:
|
|
||||||
if not value or not check_func(value):
|
|
||||||
raise ValueError(error_msg)
|
|
||||||
else:
|
|
||||||
logger.info("flag %s is not set", flag)
|
|
||||||
if not self.output_dir:
|
|
||||||
raise ValueError("missing required output_dir")
|
|
||||||
|
|
||||||
|
|
||||||
@with_params_help(VllmBuildParameters)
|
|
||||||
class VllmBuildRunner(BaseRunner):
|
|
||||||
"""
|
|
||||||
Build vLLM using docker buildx.
|
|
||||||
|
|
||||||
Environment variable options:
|
|
||||||
"USE_TORCH_WHEEL": "1: use local wheels; 0: pull nightly from pypi",
|
|
||||||
"TORCH_WHEELS_PATH": "Path to local wheels (when USE_TORCH_WHEEL=1)",
|
|
||||||
|
|
||||||
"USE_LOCAL_BASE_IMAGE": "1: use local base image; 0: default image",
|
|
||||||
"BASE_IMAGE": "name:tag to indicate base image the dockerfile depends on (when USE_LOCAL_BASE_IMAGE=1)",
|
|
||||||
|
|
||||||
"USE_LOCAL_DOCKERFILE": "1: use local Dockerfile; 0: vllm repo default dockerfile.torch_nightly",
|
|
||||||
"DOCKERFILE_PATH": "Path to Dockerfile (when USE_LOCAL_DOCKERFILE=1)",
|
|
||||||
|
|
||||||
"OUTPUT_DIR": "e.g. './shared'",
|
|
||||||
|
|
||||||
"TORCH_CUDA_ARCH_LIST": "e.g. '8.0' or '8.0;9.0'",
|
|
||||||
"CUDA_VERSION": "e.g. '12.8.1'",
|
|
||||||
"PYTHON_VERSION": "e.g. '3.12'",
|
|
||||||
"MAX_JOBS": "e.g. '64'",
|
|
||||||
"SCCACHE_BUCKET": "e.g. 'my-bucket'",
|
|
||||||
"SCCACHE_REGION": "e.g. 'us-west-2'",
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, args=None):
|
|
||||||
self.work_directory = "vllm"
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
"""
|
|
||||||
main function to run vllm build
|
|
||||||
1. prepare vllm build environment
|
|
||||||
2. prepare the docker build command args
|
|
||||||
3. run docker build
|
|
||||||
"""
|
|
||||||
inputs = VllmBuildParameters()
|
|
||||||
logger.info("Running vllm build with inputs: %s", inputs)
|
|
||||||
vllm_commit = clone_vllm()
|
|
||||||
|
|
||||||
self.cp_dockerfile_if_exist(inputs)
|
|
||||||
# cp torch wheels from root direct to vllm workspace if exist
|
|
||||||
self.cp_torch_whls_if_exist(inputs)
|
|
||||||
|
|
||||||
# make sure the output dir to store the build artifacts exist
|
|
||||||
ensure_dir_exists(Path(inputs.output_dir))
|
|
||||||
|
|
||||||
cmd = self._generate_docker_build_cmd(inputs)
|
|
||||||
logger.info("Running docker build: \n %s", cmd)
|
|
||||||
|
|
||||||
try:
|
|
||||||
run_command(cmd, cwd="vllm", env=os.environ.copy())
|
|
||||||
finally:
|
|
||||||
self.genearte_vllm_build_summary(vllm_commit, inputs)
|
|
||||||
|
|
||||||
def genearte_vllm_build_summary(
|
|
||||||
self, vllm_commit: str, inputs: VllmBuildParameters
|
|
||||||
):
|
|
||||||
if not gh_summary_path():
|
|
||||||
return logger.info("Skipping, not detect GH Summary env var....")
|
|
||||||
logger.info("Generate GH Summary ...")
|
|
||||||
# summarize vllm build info
|
|
||||||
summarize_build_info(vllm_commit)
|
|
||||||
|
|
||||||
# summarize vllm build artifacts
|
|
||||||
vllm_artifact_dir = inputs.output_dir / "wheels"
|
|
||||||
summarize_content_from_file(
|
|
||||||
vllm_artifact_dir,
|
|
||||||
"build_summary.txt",
|
|
||||||
title="Vllm build env pip package summary",
|
|
||||||
)
|
|
||||||
summarize_wheels(
|
|
||||||
inputs.torch_whls_path, max_depth=3, title="Torch Wheels Artifacts"
|
|
||||||
)
|
|
||||||
summarize_wheels(vllm_artifact_dir, max_depth=3, title="Vllm Wheels Artifacts")
|
|
||||||
|
|
||||||
def cp_torch_whls_if_exist(self, inputs: VllmBuildParameters) -> str:
|
|
||||||
if not inputs.use_torch_whl:
|
|
||||||
return ""
|
|
||||||
tmp_dir = f"./{self.work_directory}/{_VLLM_TEMP_FOLDER}"
|
|
||||||
tmp_path = Path(tmp_dir)
|
|
||||||
force_create_dir(tmp_path)
|
|
||||||
copy(inputs.torch_whls_path, tmp_dir)
|
|
||||||
return tmp_dir
|
|
||||||
|
|
||||||
def cp_dockerfile_if_exist(self, inputs: VllmBuildParameters):
|
|
||||||
if not inputs.use_local_dockerfile:
|
|
||||||
logger.info("using vllm default dockerfile.torch_nightly for build")
|
|
||||||
return
|
|
||||||
dockerfile_path = get_path(inputs.dockerfile_path, resolve=True)
|
|
||||||
vllm_torch_dockerfile = Path(
|
|
||||||
f"./{self.work_directory}/docker/Dockerfile.nightly_torch"
|
|
||||||
)
|
|
||||||
copy(dockerfile_path, vllm_torch_dockerfile)
|
|
||||||
|
|
||||||
def get_result_path(self, path):
|
|
||||||
"""
|
|
||||||
Get the absolute path of the result path
|
|
||||||
"""
|
|
||||||
if not path:
|
|
||||||
path = _DEFAULT_RESULT_PATH
|
|
||||||
abs_path = get_path(path, resolve=True)
|
|
||||||
return abs_path
|
|
||||||
|
|
||||||
def _get_torch_wheel_path_arg(self, torch_whl_dir: Optional[Path]) -> str:
|
|
||||||
if not torch_whl_dir:
|
|
||||||
return ""
|
|
||||||
return f"--build-arg TORCH_WHEELS_PATH={_VLLM_TEMP_FOLDER}"
|
|
||||||
|
|
||||||
def _get_base_image_args(self, inputs: VllmBuildParameters) -> tuple[str, str, str]:
|
|
||||||
"""
|
|
||||||
Returns:
|
|
||||||
- base_image_arg: docker buildx arg string for base image
|
|
||||||
- final_base_image_arg: docker buildx arg string for vllm-base stage
|
|
||||||
- pull_flag: --pull=true or --pull=false depending on whether the image exists locally
|
|
||||||
"""
|
|
||||||
if not inputs.use_local_base_image:
|
|
||||||
return "", "", ""
|
|
||||||
|
|
||||||
base_image = inputs.base_image
|
|
||||||
|
|
||||||
# set both base image and final base image to the same local image
|
|
||||||
base_image_arg = f"--build-arg BUILD_BASE_IMAGE={base_image}"
|
|
||||||
final_base_image_arg = f"--build-arg FINAL_BASE_IMAGE={base_image}"
|
|
||||||
|
|
||||||
if local_image_exists(base_image):
|
|
||||||
pull_flag = "--pull=false"
|
|
||||||
return base_image_arg, final_base_image_arg, pull_flag
|
|
||||||
logger.info(
|
|
||||||
"[INFO] Local image not found:%s will try to pull from remote", {base_image}
|
|
||||||
)
|
|
||||||
return base_image_arg, final_base_image_arg, ""
|
|
||||||
|
|
||||||
def _generate_docker_build_cmd(
|
|
||||||
self,
|
|
||||||
inputs: VllmBuildParameters,
|
|
||||||
) -> str:
|
|
||||||
base_image_arg, final_base_image_arg, pull_flag = self._get_base_image_args(
|
|
||||||
inputs
|
|
||||||
)
|
|
||||||
torch_arg = self._get_torch_wheel_path_arg(inputs.torch_whls_path)
|
|
||||||
|
|
||||||
return textwrap.dedent(
|
|
||||||
f"""
|
|
||||||
docker buildx build \
|
|
||||||
--output type=local,dest={inputs.output_dir} \
|
|
||||||
-f docker/Dockerfile.nightly_torch \
|
|
||||||
{pull_flag} \
|
|
||||||
{torch_arg} \
|
|
||||||
{base_image_arg} \
|
|
||||||
{final_base_image_arg} \
|
|
||||||
--build-arg max_jobs={inputs.max_jobs} \
|
|
||||||
--build-arg CUDA_VERSION={inputs.cuda_version} \
|
|
||||||
--build-arg PYTHON_VERSION={inputs.python_version} \
|
|
||||||
--build-arg USE_SCCACHE={int(bool(inputs.sccache_bucket and inputs.sccache_region))} \
|
|
||||||
--build-arg SCCACHE_BUCKET_NAME={inputs.sccache_bucket} \
|
|
||||||
--build-arg SCCACHE_REGION_NAME={inputs.sccache_region} \
|
|
||||||
--build-arg torch_cuda_arch_list='{inputs.torch_cuda_arch_list}' \
|
|
||||||
--target {inputs.target_stage} \
|
|
||||||
-t {inputs.tag_name} \
|
|
||||||
--progress=plain .
|
|
||||||
"""
|
|
||||||
).strip()
|
|
||||||
@ -1,269 +0,0 @@
|
|||||||
import logging
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import subprocess
|
|
||||||
import sys
|
|
||||||
from collections.abc import Iterable
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from enum import Enum
|
|
||||||
from pathlib import Path
|
|
||||||
from typing import Any
|
|
||||||
|
|
||||||
from cli.lib.common.cli_helper import BaseRunner
|
|
||||||
from cli.lib.common.envs_helper import env_path_field, env_str_field, get_env
|
|
||||||
from cli.lib.common.path_helper import copy, remove_dir
|
|
||||||
from cli.lib.common.pip_helper import (
|
|
||||||
pip_install_first_match,
|
|
||||||
pip_install_packages,
|
|
||||||
pkg_exists,
|
|
||||||
run_python,
|
|
||||||
)
|
|
||||||
from cli.lib.common.utils import run_command, working_directory
|
|
||||||
from cli.lib.core.vllm.lib import clone_vllm, run_test_plan, sample_vllm_test_library
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class VllmTestParameters:
|
|
||||||
"""
|
|
||||||
Parameters defining the vllm external test input
|
|
||||||
|
|
||||||
!!!DO NOT ADD SECRETS IN THIS CLASS!!!
|
|
||||||
you can put environment variable name in VllmTestParameters if it's not the same as the secret one
|
|
||||||
fetch secrests directly from env variables during runtime
|
|
||||||
"""
|
|
||||||
|
|
||||||
torch_whls_path: Path = env_path_field("WHEELS_PATH", "./dist")
|
|
||||||
|
|
||||||
vllm_whls_path: Path = env_path_field(
|
|
||||||
"VLLM_WHEELS_PATH", "./dist/external/vllm/wheels"
|
|
||||||
)
|
|
||||||
|
|
||||||
torch_cuda_arch_list: str = env_str_field("TORCH_CUDA_ARCH_LIST", "8.9")
|
|
||||||
|
|
||||||
def __post_init__(self):
|
|
||||||
if not self.torch_whls_path.exists():
|
|
||||||
raise ValueError("missing torch_whls_path")
|
|
||||||
if not self.vllm_whls_path.exists():
|
|
||||||
raise ValueError("missing vllm_whls_path")
|
|
||||||
|
|
||||||
|
|
||||||
class TestInpuType(Enum):
|
|
||||||
TEST_PLAN = "test_plan"
|
|
||||||
UNKNOWN = "unknown"
|
|
||||||
|
|
||||||
|
|
||||||
class VllmTestRunner(BaseRunner):
|
|
||||||
def __init__(self, args: Any):
|
|
||||||
self.work_directory = "vllm"
|
|
||||||
self.test_plan = ""
|
|
||||||
self.test_type = TestInpuType.UNKNOWN
|
|
||||||
|
|
||||||
self.shard_id = args.shard_id
|
|
||||||
self.num_shards = args.num_shards
|
|
||||||
|
|
||||||
if args.test_plan:
|
|
||||||
self.test_plan = args.test_plan
|
|
||||||
self.test_type = TestInpuType.TEST_PLAN
|
|
||||||
|
|
||||||
# Matches the structeur in the artifacts.zip from torcb build
|
|
||||||
self.TORCH_WHL_PATH_REGEX = "torch*.whl"
|
|
||||||
self.TORCH_WHL_EXTRA = "opt-einsum"
|
|
||||||
self.TORCH_ADDITIONAL_WHLS_REGEX = [
|
|
||||||
"vision/torchvision*.whl",
|
|
||||||
"audio/torchaudio*.whl",
|
|
||||||
]
|
|
||||||
|
|
||||||
# Match the structure of the artifacts.zip from vllm external build
|
|
||||||
self.VLLM_TEST_WHLS_REGEX = [
|
|
||||||
"xformers/*.whl",
|
|
||||||
"vllm/vllm*.whl",
|
|
||||||
"flashinfer-python/flashinfer*.whl",
|
|
||||||
]
|
|
||||||
|
|
||||||
def prepare(self):
|
|
||||||
"""
|
|
||||||
prepare test environment for vllm. This includes clone vllm repo, install all wheels, test dependencies and set env
|
|
||||||
"""
|
|
||||||
params = VllmTestParameters()
|
|
||||||
logger.info("Display VllmTestParameters %s", params)
|
|
||||||
self._set_envs(params)
|
|
||||||
|
|
||||||
clone_vllm(dst=self.work_directory)
|
|
||||||
with working_directory(self.work_directory):
|
|
||||||
remove_dir(Path("vllm"))
|
|
||||||
self._install_wheels(params)
|
|
||||||
self._install_dependencies()
|
|
||||||
# verify the torches are not overridden by test dependencies
|
|
||||||
check_versions()
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
"""
|
|
||||||
main function to run vllm test
|
|
||||||
"""
|
|
||||||
self.prepare()
|
|
||||||
try:
|
|
||||||
with working_directory(self.work_directory):
|
|
||||||
if self.test_type == TestInpuType.TEST_PLAN:
|
|
||||||
if self.num_shards > 1:
|
|
||||||
run_test_plan(
|
|
||||||
self.test_plan,
|
|
||||||
"vllm",
|
|
||||||
sample_vllm_test_library(),
|
|
||||||
self.shard_id,
|
|
||||||
self.num_shards,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
run_test_plan(
|
|
||||||
self.test_plan, "vllm", sample_vllm_test_library()
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
raise ValueError(f"Unknown test type {self.test_type}")
|
|
||||||
finally:
|
|
||||||
# double check the torches are not overridden by other packages
|
|
||||||
check_versions()
|
|
||||||
|
|
||||||
def _install_wheels(self, params: VllmTestParameters):
|
|
||||||
logger.info("Running vllm test with inputs: %s", params)
|
|
||||||
if not pkg_exists("torch"):
|
|
||||||
# install torch from local whls if it's not installed yet.
|
|
||||||
torch_p = f"{str(params.torch_whls_path)}/{self.TORCH_WHL_PATH_REGEX}"
|
|
||||||
pip_install_first_match(torch_p, self.TORCH_WHL_EXTRA)
|
|
||||||
|
|
||||||
torch_whls_path = [
|
|
||||||
f"{str(params.torch_whls_path)}/{whl_path}"
|
|
||||||
for whl_path in self.TORCH_ADDITIONAL_WHLS_REGEX
|
|
||||||
]
|
|
||||||
for torch_whl in torch_whls_path:
|
|
||||||
pip_install_first_match(torch_whl)
|
|
||||||
logger.info("Done. Installed torch and other torch-related wheels ")
|
|
||||||
|
|
||||||
logger.info("Installing vllm wheels")
|
|
||||||
vllm_whls_path = [
|
|
||||||
f"{str(params.vllm_whls_path)}/{whl_path}"
|
|
||||||
for whl_path in self.VLLM_TEST_WHLS_REGEX
|
|
||||||
]
|
|
||||||
for vllm_whl in vllm_whls_path:
|
|
||||||
pip_install_first_match(vllm_whl)
|
|
||||||
logger.info("Done. Installed vllm wheels")
|
|
||||||
|
|
||||||
def _install_test_dependencies(self):
|
|
||||||
"""
|
|
||||||
This method replaces torch dependencies with local torch wheel info in
|
|
||||||
requirements/test.in file from vllm repo. then generates the test.txt
|
|
||||||
in runtime
|
|
||||||
"""
|
|
||||||
logger.info("generate test.txt from requirements/test.in with local torch whls")
|
|
||||||
preprocess_test_in()
|
|
||||||
copy("requirements/test.txt", "snapshot_constraint.txt")
|
|
||||||
|
|
||||||
run_command(
|
|
||||||
f"{sys.executable} -m uv pip compile requirements/test.in "
|
|
||||||
"-o test.txt "
|
|
||||||
"--index-strategy unsafe-best-match "
|
|
||||||
"--constraint snapshot_constraint.txt "
|
|
||||||
"--torch-backend cu128"
|
|
||||||
)
|
|
||||||
pip_install_packages(requirements="test.txt", prefer_uv=True)
|
|
||||||
logger.info("Done. installed requirements for test dependencies")
|
|
||||||
|
|
||||||
def _install_dependencies(self):
|
|
||||||
pip_install_packages(packages=["-e", "tests/vllm_test_utils"], prefer_uv=True)
|
|
||||||
pip_install_packages(packages=["hf_transfer"], prefer_uv=True)
|
|
||||||
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
|
|
||||||
|
|
||||||
# using script from vllm repo to remove all torch packages from requirements txt
|
|
||||||
run_python("use_existing_torch.py")
|
|
||||||
|
|
||||||
# install common packages
|
|
||||||
for requirements in ["requirements/common.txt", "requirements/build.txt"]:
|
|
||||||
pip_install_packages(
|
|
||||||
requirements=requirements,
|
|
||||||
prefer_uv=True,
|
|
||||||
)
|
|
||||||
# install test packages
|
|
||||||
self._install_test_dependencies()
|
|
||||||
|
|
||||||
def _set_envs(self, inputs: VllmTestParameters):
|
|
||||||
os.environ["TORCH_CUDA_ARCH_LIST"] = inputs.torch_cuda_arch_list
|
|
||||||
if not validate_cuda(get_env("TORCH_CUDA_ARCH_LIST")):
|
|
||||||
logger.warning(
|
|
||||||
"Missing supported TORCH_CUDA_ARCH_LIST. "
|
|
||||||
"Currently support TORCH_CUDA_ARCH_LIST env var "
|
|
||||||
"with supported arch [8.0, 8.9, 9.0]"
|
|
||||||
)
|
|
||||||
|
|
||||||
os.environ["HF_TOKEN"] = os.getenv("VLLM_TEST_HUGGING_FACE_TOKEN", "")
|
|
||||||
if not get_env("HF_TOKEN"):
|
|
||||||
raise ValueError(
|
|
||||||
"missing required HF_TOKEN, please set VLLM_TEST_HUGGING_FACE_TOKEN env var"
|
|
||||||
)
|
|
||||||
if not get_env("TORCH_CUDA_ARCH_LIST"):
|
|
||||||
raise ValueError(
|
|
||||||
"missing required TORCH_CUDA_ARCH_LIST, please set TORCH_CUDA_ARCH_LIST env var"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def preprocess_test_in(
|
|
||||||
target_file: str = "requirements/test.in", additional_packages: Iterable[str] = ()
|
|
||||||
):
|
|
||||||
"""
|
|
||||||
This modifies the target_file file in place in vllm work directory.
|
|
||||||
It removes torch and unwanted packages in target_file and replace with local torch whls
|
|
||||||
package with format "$WHEEL_PACKAGE_NAME @ file://<LOCAL_PATH>"
|
|
||||||
"""
|
|
||||||
additional_package_to_move = list(additional_packages or ())
|
|
||||||
pkgs_to_remove = [
|
|
||||||
"torch",
|
|
||||||
"torchvision",
|
|
||||||
"torchaudio",
|
|
||||||
"xformers",
|
|
||||||
"mamba_ssm",
|
|
||||||
] + additional_package_to_move
|
|
||||||
# Read current requirements
|
|
||||||
target_path = Path(target_file)
|
|
||||||
lines = target_path.read_text().splitlines()
|
|
||||||
|
|
||||||
pkgs_to_add = []
|
|
||||||
|
|
||||||
# Remove lines starting with the package names (==, @, >=) — case-insensitive
|
|
||||||
pattern = re.compile(rf"^({'|'.join(pkgs_to_remove)})\s*(==|@|>=)", re.IGNORECASE)
|
|
||||||
kept_lines = [line for line in lines if not pattern.match(line)]
|
|
||||||
|
|
||||||
# Get local installed torch/vision/audio from pip freeze
|
|
||||||
# This is hacky, but it works
|
|
||||||
pip_freeze = subprocess.check_output(["pip", "freeze"], text=True)
|
|
||||||
header_lines = [
|
|
||||||
line
|
|
||||||
for line in pip_freeze.splitlines()
|
|
||||||
if re.match(
|
|
||||||
r"^(torch|torchvision|torchaudio)\s*@\s*file://", line, re.IGNORECASE
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
# Write back: header_lines + blank + kept_lines
|
|
||||||
out_lines = header_lines + [""] + kept_lines
|
|
||||||
if pkgs_to_add:
|
|
||||||
out_lines += [""] + pkgs_to_add
|
|
||||||
|
|
||||||
out = "\n".join(out_lines) + "\n"
|
|
||||||
target_path.write_text(out)
|
|
||||||
logger.info("[INFO] Updated %s", target_file)
|
|
||||||
|
|
||||||
|
|
||||||
def validate_cuda(value: str) -> bool:
|
|
||||||
VALID_VALUES = {"8.0", "8.9", "9.0"}
|
|
||||||
return all(v in VALID_VALUES for v in value.split())
|
|
||||||
|
|
||||||
|
|
||||||
def check_versions():
|
|
||||||
"""
|
|
||||||
check installed packages version
|
|
||||||
"""
|
|
||||||
logger.info("Double check installed packages")
|
|
||||||
patterns = ["torch", "xformers", "torchvision", "torchaudio", "vllm"]
|
|
||||||
for pkg in patterns:
|
|
||||||
pkg_exists(pkg)
|
|
||||||
logger.info("Done. checked installed packages")
|
|
||||||
@ -1,40 +0,0 @@
|
|||||||
# main.py
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from cli.build_cli.register_build import register_build_commands
|
|
||||||
from cli.lib.common.logger import setup_logging
|
|
||||||
from cli.test_cli.register_test import register_test_commands
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# Define top-level parser
|
|
||||||
parser = argparse.ArgumentParser(description="Lumos CLI")
|
|
||||||
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
||||||
parser.add_argument(
|
|
||||||
"--log-level", default="INFO", help="Log level (DEBUG, INFO, WARNING, ERROR)"
|
|
||||||
)
|
|
||||||
|
|
||||||
# registers second-level subcommands
|
|
||||||
register_build_commands(subparsers)
|
|
||||||
register_test_commands(subparsers)
|
|
||||||
|
|
||||||
# parse args after all options are registered
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# setup global logging
|
|
||||||
setup_logging(getattr(logging, args.log_level.upper(), logging.INFO))
|
|
||||||
logger.debug("Parsed args: %s", args)
|
|
||||||
|
|
||||||
if hasattr(args, "func"):
|
|
||||||
args.func(args)
|
|
||||||
else:
|
|
||||||
parser.print_help()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
@ -1,62 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import logging
|
|
||||||
|
|
||||||
from cli.lib.common.cli_helper import register_targets, RichHelp, TargetSpec
|
|
||||||
from cli.lib.core.vllm.vllm_test import VllmTestRunner
|
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
# Maps targets to their argparse configuration and runner
|
|
||||||
# it adds new target to path python -m cli.run build external {target} with buildrunner
|
|
||||||
_TARGETS: dict[str, TargetSpec] = {
|
|
||||||
"vllm": {
|
|
||||||
"runner": VllmTestRunner,
|
|
||||||
"help": "test vLLM with pytorch main",
|
|
||||||
}
|
|
||||||
# add yours ...
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def common_args(parser: argparse.ArgumentParser) -> None:
|
|
||||||
"""
|
|
||||||
Add common CLI arguments to the given parser.
|
|
||||||
"""
|
|
||||||
parser.add_argument(
|
|
||||||
"--shard-id",
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help="a shard id to run, e.g. '0,1,2,3'",
|
|
||||||
)
|
|
||||||
parser.add_argument(
|
|
||||||
"--num-shards",
|
|
||||||
type=int,
|
|
||||||
default=1,
|
|
||||||
help="a number of shards to run, e.g. '4'",
|
|
||||||
)
|
|
||||||
group = parser.add_mutually_exclusive_group(required=True)
|
|
||||||
group.add_argument(
|
|
||||||
"-tp",
|
|
||||||
"--test-plan",
|
|
||||||
type=str,
|
|
||||||
help="a pre-defined test plan to run, e.g. 'basic_correctness_test'",
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def register_test_commands(subparsers: argparse._SubParsersAction) -> None:
|
|
||||||
build_parser = subparsers.add_parser(
|
|
||||||
"test",
|
|
||||||
help="test related commands",
|
|
||||||
formatter_class=RichHelp,
|
|
||||||
)
|
|
||||||
build_subparsers = build_parser.add_subparsers(dest="test_command", required=True)
|
|
||||||
overview = "\n".join(
|
|
||||||
f" {name:12} {spec.get('help', '')}" for name, spec in _TARGETS.items()
|
|
||||||
)
|
|
||||||
external_parser = build_subparsers.add_parser(
|
|
||||||
"external",
|
|
||||||
help="Test external targets",
|
|
||||||
description="Test third-party targets.\n\nAvailable targets:\n" + overview,
|
|
||||||
formatter_class=RichHelp,
|
|
||||||
)
|
|
||||||
register_targets(external_parser, _TARGETS, common_args=common_args)
|
|
||||||
@ -1,23 +0,0 @@
|
|||||||
[project]
|
|
||||||
name = "lumen-ci"
|
|
||||||
version = "0.1.0"
|
|
||||||
dependencies = [
|
|
||||||
"pyyaml==6.0.2",
|
|
||||||
"GitPython==3.1.45",
|
|
||||||
"docker==7.1.0",
|
|
||||||
"pytest==7.3.2",
|
|
||||||
"uv==0.8.6"
|
|
||||||
]
|
|
||||||
|
|
||||||
[tool.setuptools]
|
|
||||||
packages = ["cli"]
|
|
||||||
|
|
||||||
[tool.setuptools.package-dir]
|
|
||||||
cli = "cli"
|
|
||||||
|
|
||||||
[tool.ruff.lint]
|
|
||||||
# Enable preview mode for linting
|
|
||||||
preview = true
|
|
||||||
|
|
||||||
# Now you can select your preview rules, like RUF048
|
|
||||||
extend-select = ["RUF048"]
|
|
||||||
@ -1,47 +0,0 @@
|
|||||||
# tests/test_cli.py
|
|
||||||
import io
|
|
||||||
import sys
|
|
||||||
import unittest
|
|
||||||
from contextlib import redirect_stderr, redirect_stdout
|
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
from cli.run import main
|
|
||||||
|
|
||||||
|
|
||||||
class TestArgparseCLI(unittest.TestCase):
|
|
||||||
@patch("cli.build_cli.register_build.VllmBuildRunner.run", return_value=None)
|
|
||||||
@patch("cli.build_cli.register_build.VllmBuildRunner.__init__", return_value=None)
|
|
||||||
def test_cli_run_build_external(self, mock_init, mock_run):
|
|
||||||
from cli.run import main # import after patches if needed
|
|
||||||
|
|
||||||
test_args = ["cli.run", "build", "external", "vllm"]
|
|
||||||
with patch.object(sys, "argv", test_args):
|
|
||||||
# argparse may call sys.exit on error; capture to avoid test aborts
|
|
||||||
try:
|
|
||||||
main()
|
|
||||||
except SystemExit:
|
|
||||||
pass
|
|
||||||
mock_init.assert_called_once() # got constructed
|
|
||||||
mock_run.assert_called_once_with() # run() called
|
|
||||||
|
|
||||||
def test_build_help(self):
|
|
||||||
test_args = ["cli.run", "build", "--help"]
|
|
||||||
|
|
||||||
with patch.object(sys, "argv", test_args):
|
|
||||||
stdout = io.StringIO()
|
|
||||||
stderr = io.StringIO()
|
|
||||||
|
|
||||||
# --help always raises SystemExit(0)
|
|
||||||
with self.assertRaises(SystemExit) as cm:
|
|
||||||
with redirect_stdout(stdout), redirect_stderr(stderr):
|
|
||||||
main()
|
|
||||||
|
|
||||||
self.assertEqual(cm.exception.code, 0)
|
|
||||||
|
|
||||||
output = stdout.getvalue()
|
|
||||||
self.assertIn("usage", output)
|
|
||||||
self.assertIn("external", output)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@ -1,115 +0,0 @@
|
|||||||
import argparse
|
|
||||||
import io
|
|
||||||
import unittest
|
|
||||||
from contextlib import redirect_stderr
|
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
from cli.lib.common.cli_helper import BaseRunner, register_targets, RichHelp, TargetSpec
|
|
||||||
|
|
||||||
|
|
||||||
# ---- Dummy runners for unittests----
|
|
||||||
class FooRunner(BaseRunner):
|
|
||||||
"""Foo description from docstring."""
|
|
||||||
|
|
||||||
def run(self) -> None: # replaced by mock
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
class BarRunner(BaseRunner):
|
|
||||||
def run(self) -> None: # replaced by mock
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def add_foo_args(p: argparse.ArgumentParser) -> None:
|
|
||||||
p.add_argument("--x", type=int, required=True, help="x value")
|
|
||||||
|
|
||||||
|
|
||||||
def common_args(p: argparse.ArgumentParser) -> None:
|
|
||||||
p.add_argument("--verbose", action="store_true", help="verbose flag")
|
|
||||||
|
|
||||||
|
|
||||||
def build_parser(specs: dict[str, TargetSpec]) -> argparse.ArgumentParser:
|
|
||||||
parser = argparse.ArgumentParser(prog="app", formatter_class=RichHelp)
|
|
||||||
register_targets(
|
|
||||||
parser=parser,
|
|
||||||
target_specs=specs,
|
|
||||||
common_args=common_args,
|
|
||||||
)
|
|
||||||
return parser
|
|
||||||
|
|
||||||
|
|
||||||
def get_subparser(
|
|
||||||
parser: argparse.ArgumentParser, name: str
|
|
||||||
) -> argparse.ArgumentParser:
|
|
||||||
subparsers_action = next(
|
|
||||||
a
|
|
||||||
for a in parser._subparsers._group_actions # type: ignore[attr-defined]
|
|
||||||
if isinstance(a, argparse._SubParsersAction)
|
|
||||||
)
|
|
||||||
return subparsers_action.choices[name]
|
|
||||||
|
|
||||||
|
|
||||||
class TestRegisterTargets(unittest.TestCase):
|
|
||||||
def test_metavar_lists_targets(self):
|
|
||||||
specs: dict[str, TargetSpec] = {
|
|
||||||
"foo": {"runner": FooRunner, "add_arguments": add_foo_args},
|
|
||||||
"bar": {"runner": BarRunner},
|
|
||||||
}
|
|
||||||
parser = build_parser(specs)
|
|
||||||
subparsers_action = next(
|
|
||||||
a
|
|
||||||
for a in parser._subparsers._group_actions # type: ignore[attr-defined]
|
|
||||||
if isinstance(a, argparse._SubParsersAction)
|
|
||||||
)
|
|
||||||
self.assertEqual(subparsers_action.metavar, "{foo,bar}")
|
|
||||||
|
|
||||||
def test_add_arguments_and_common_args_present(self):
|
|
||||||
specs: dict[str, TargetSpec] = {
|
|
||||||
"foo": {"runner": FooRunner, "add_arguments": add_foo_args},
|
|
||||||
}
|
|
||||||
parser = build_parser(specs)
|
|
||||||
foo = get_subparser(parser, "foo")
|
|
||||||
help_text = foo.format_help()
|
|
||||||
self.assertIn("--x", help_text)
|
|
||||||
self.assertIn("--verbose", help_text)
|
|
||||||
|
|
||||||
def test_runner_constructed_with_ns_and_run_called(self):
|
|
||||||
specs: dict[str, TargetSpec] = {
|
|
||||||
"foo": {"runner": FooRunner, "add_arguments": add_foo_args},
|
|
||||||
}
|
|
||||||
parser = build_parser(specs)
|
|
||||||
|
|
||||||
with (
|
|
||||||
patch.object(FooRunner, "__init__", return_value=None) as mock_init,
|
|
||||||
patch.object(FooRunner, "run", return_value=None) as mock_run,
|
|
||||||
):
|
|
||||||
ns = parser.parse_args(["foo", "--x", "3", "--verbose"])
|
|
||||||
ns.func(ns) # set by register_targets
|
|
||||||
# __init__ received the Namespace
|
|
||||||
self.assertEqual(mock_init.call_count, 1)
|
|
||||||
(called_ns,), _ = mock_init.call_args
|
|
||||||
self.assertIsInstance(called_ns, argparse.Namespace)
|
|
||||||
# run() called with no args
|
|
||||||
mock_run.assert_called_once_with()
|
|
||||||
|
|
||||||
def test_runner_docstring_used_as_description_when_missing(self):
|
|
||||||
specs: dict[str, TargetSpec] = {
|
|
||||||
"foo": {"runner": FooRunner, "add_arguments": add_foo_args},
|
|
||||||
}
|
|
||||||
parser = build_parser(specs)
|
|
||||||
foo = get_subparser(parser, "foo")
|
|
||||||
help_text = foo.format_help()
|
|
||||||
self.assertIn("Foo description from docstring.", help_text)
|
|
||||||
|
|
||||||
def test_missing_target_raises_systemexit_with_usage(self):
|
|
||||||
specs: dict[str, TargetSpec] = {"foo": {"runner": FooRunner}}
|
|
||||||
parser = build_parser(specs)
|
|
||||||
buf = io.StringIO()
|
|
||||||
with self.assertRaises(SystemExit), redirect_stderr(buf):
|
|
||||||
parser.parse_args([])
|
|
||||||
err = buf.getvalue()
|
|
||||||
self.assertIn("usage:", err)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@ -1,75 +0,0 @@
|
|||||||
import unittest
|
|
||||||
from unittest import mock
|
|
||||||
from unittest.mock import MagicMock
|
|
||||||
|
|
||||||
import docker.errors as derr
|
|
||||||
from cli.lib.common.docker_helper import _get_client, local_image_exists
|
|
||||||
|
|
||||||
|
|
||||||
class TestDockerImageHelpers(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
# Reset the singleton in the target module
|
|
||||||
patcher = mock.patch("cli.lib.common.docker_helper._docker_client", None)
|
|
||||||
self.addCleanup(patcher.stop)
|
|
||||||
patcher.start()
|
|
||||||
|
|
||||||
def test_local_image_exists_true(self):
|
|
||||||
# Mock a docker client whose images.get returns an object (no exception)
|
|
||||||
mock_client = MagicMock()
|
|
||||||
mock_client.images.get.return_value = object()
|
|
||||||
ok = local_image_exists("repo:tag", client=mock_client)
|
|
||||||
self.assertTrue(ok)
|
|
||||||
|
|
||||||
def test_local_image_exists_not_found_false(self):
|
|
||||||
mock_client = MagicMock()
|
|
||||||
# Raise docker.errors.NotFound
|
|
||||||
mock_client.images.get.side_effect = derr.NotFound("nope")
|
|
||||||
ok = local_image_exists("missing:latest", client=mock_client)
|
|
||||||
self.assertFalse(ok)
|
|
||||||
|
|
||||||
def test_local_image_exists_api_error_false(self):
|
|
||||||
mock_client = MagicMock()
|
|
||||||
mock_client.images.get.side_effect = derr.APIError("boom", None)
|
|
||||||
|
|
||||||
ok = local_image_exists("broken:tag", client=mock_client)
|
|
||||||
self.assertFalse(ok)
|
|
||||||
|
|
||||||
def test_local_image_exists_uses_lazy_singleton(self):
|
|
||||||
# Patch docker.from_env used by _get_client()
|
|
||||||
with mock.patch(
|
|
||||||
"cli.lib.common.docker_helper.docker.from_env"
|
|
||||||
) as mock_from_env:
|
|
||||||
mock_docker_client = MagicMock()
|
|
||||||
mock_from_env.return_value = mock_docker_client
|
|
||||||
|
|
||||||
# First call should create and cache the client
|
|
||||||
c1 = _get_client()
|
|
||||||
self.assertIs(c1, mock_docker_client)
|
|
||||||
mock_from_env.assert_called_once()
|
|
||||||
|
|
||||||
# Second call should reuse cached client (no extra from_env calls)
|
|
||||||
c2 = _get_client()
|
|
||||||
self.assertIs(c2, mock_docker_client)
|
|
||||||
mock_from_env.assert_called_once() # still once
|
|
||||||
|
|
||||||
def test_local_image_exists_without_client_param_calls_get_client_once(self):
|
|
||||||
# Ensure _get_client is called and cached; local_image_exists should reuse it
|
|
||||||
with mock.patch("cli.lib.common.docker_helper._get_client") as mock_get_client:
|
|
||||||
mock_client = MagicMock()
|
|
||||||
mock_get_client.return_value = mock_client
|
|
||||||
|
|
||||||
# 1st call
|
|
||||||
local_image_exists("repo:tag")
|
|
||||||
# 2nd call
|
|
||||||
local_image_exists("repo:tag2")
|
|
||||||
|
|
||||||
# local_image_exists should call _get_client each time,
|
|
||||||
# but your _get_client itself caches docker.from_env.
|
|
||||||
self.assertEqual(mock_get_client.call_count, 2)
|
|
||||||
self.assertEqual(mock_client.images.get.call_count, 2)
|
|
||||||
mock_client.images.get.assert_any_call("repo:tag")
|
|
||||||
mock_client.images.get.assert_any_call("repo:tag2")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@ -1,149 +0,0 @@
|
|||||||
import os
|
|
||||||
import unittest
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from pathlib import Path
|
|
||||||
from unittest.mock import patch
|
|
||||||
|
|
||||||
import cli.lib.common.envs_helper as m
|
|
||||||
|
|
||||||
|
|
||||||
class TestEnvHelpers(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
# Keep a copy of the original environment to restore later
|
|
||||||
self._env_backup = dict(os.environ)
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
# Restore environment to original state
|
|
||||||
os.environ.clear()
|
|
||||||
os.environ.update(self._env_backup)
|
|
||||||
|
|
||||||
# -------- get_env --------
|
|
||||||
def test_get_env_unset_returns_default(self):
|
|
||||||
with patch.dict(os.environ, {}, clear=True):
|
|
||||||
self.assertEqual(m.get_env("FOO", "default"), "default")
|
|
||||||
|
|
||||||
def test_get_env_empty_returns_default(self):
|
|
||||||
with patch.dict(os.environ, {"FOO": ""}, clear=True):
|
|
||||||
self.assertEqual(m.get_env("FOO", "default"), "default")
|
|
||||||
|
|
||||||
def test_get_env_set_returns_value(self):
|
|
||||||
with patch.dict(os.environ, {"FOO": "bar"}, clear=True):
|
|
||||||
self.assertEqual(m.get_env("FOO", "default"), "bar")
|
|
||||||
|
|
||||||
def test_get_env_not_exist_returns_default(self):
|
|
||||||
with patch.dict(os.environ, {"FOO": "bar"}, clear=True):
|
|
||||||
self.assertEqual(m.get_env("TEST_NOT_EXIST", "default"), "default")
|
|
||||||
|
|
||||||
def test_get_env_not_exist_without_default(self):
|
|
||||||
with patch.dict(os.environ, {"FOO": "bar"}, clear=True):
|
|
||||||
self.assertEqual(m.get_env("TEST_NOT_EXIST"), "")
|
|
||||||
|
|
||||||
# -------- env_bool --------
|
|
||||||
def test_env_bool_uses_default_when_unset(self):
|
|
||||||
with patch.dict(os.environ, {}, clear=True):
|
|
||||||
self.assertTrue(m.env_bool("FLAG", default=True))
|
|
||||||
self.assertFalse(m.env_bool("FLAG", default=False))
|
|
||||||
|
|
||||||
def test_env_bool_uses_str2bool_when_set(self):
|
|
||||||
# Patch str2bool used by env_bool so we don't depend on its exact behavior
|
|
||||||
def fake_str2bool(s: str) -> bool:
|
|
||||||
return s.lower() in {"1", "true", "yes", "on", "y"}
|
|
||||||
|
|
||||||
with (
|
|
||||||
patch.dict(os.environ, {"FLAG": "yEs"}, clear=True),
|
|
||||||
patch.object(m, "str2bool", fake_str2bool),
|
|
||||||
):
|
|
||||||
self.assertTrue(m.env_bool("FLAG", default=False))
|
|
||||||
|
|
||||||
# -------- env_path_optional / env_path --------
|
|
||||||
def test_env_path_optional_unset_returns_none_by_default(self):
|
|
||||||
with patch.dict(os.environ, {}, clear=True):
|
|
||||||
self.assertIsNone(m.env_path_optional("P"))
|
|
||||||
|
|
||||||
def test_env_path_optional_unset_returns_none_when_env_var_is_empty(self):
|
|
||||||
with patch.dict(os.environ, {"P": ""}, clear=True):
|
|
||||||
self.assertIsNone(m.env_path_optional("P"))
|
|
||||||
|
|
||||||
def test_env_path_optional_unset_returns_default_str(self):
|
|
||||||
# default as string; resolve=True by default -> absolute path
|
|
||||||
default_str = "x/y"
|
|
||||||
with patch.dict(os.environ, {}, clear=True):
|
|
||||||
p = m.env_path_optional("P", default=default_str)
|
|
||||||
self.assertIsInstance(p, Path)
|
|
||||||
self.assertIsNotNone(p)
|
|
||||||
if p:
|
|
||||||
self.assertTrue(p.is_absolute())
|
|
||||||
self.assertEqual(p.parts[-2:], ("x", "y"))
|
|
||||||
|
|
||||||
def test_env_path_optional_unset_returns_default_path_no_resolve(self):
|
|
||||||
d = Path("z")
|
|
||||||
with patch.dict(os.environ, {}, clear=True):
|
|
||||||
p = m.env_path_optional("P", default=d, resolve=False)
|
|
||||||
self.assertEqual(p, d)
|
|
||||||
|
|
||||||
def test_env_path_optional_respects_resolve_true(self):
|
|
||||||
with patch.dict(os.environ, {"P": "a/b"}, clear=True):
|
|
||||||
p = m.env_path_optional("P", resolve=True)
|
|
||||||
self.assertIsInstance(p, Path)
|
|
||||||
if p:
|
|
||||||
self.assertTrue(p.is_absolute())
|
|
||||||
|
|
||||||
def test_env_path_optional_respects_resolve_false(self):
|
|
||||||
with patch.dict(os.environ, {"P": "rel/dir"}, clear=True):
|
|
||||||
p = m.env_path_optional("P", resolve=False)
|
|
||||||
self.assertEqual(p, Path("rel/dir"))
|
|
||||||
if p:
|
|
||||||
self.assertFalse(p.is_absolute())
|
|
||||||
|
|
||||||
def test_env_path_raises_when_missing_and_default_none(self):
|
|
||||||
with patch.dict(os.environ, {}, clear=True):
|
|
||||||
with self.assertRaises(ValueError):
|
|
||||||
m.env_path("P", None, resolve=True)
|
|
||||||
|
|
||||||
def test_env_path_returns_path_when_present(self):
|
|
||||||
tmp = Path("./b").resolve()
|
|
||||||
with patch.dict(os.environ, {"P": str(tmp)}, clear=True):
|
|
||||||
p = m.env_path("P", None, resolve=True)
|
|
||||||
self.assertEqual(p, tmp)
|
|
||||||
|
|
||||||
# -------- dataclass field helpers --------
|
|
||||||
def test_dataclass_fields_read_env_at_instantiation(self):
|
|
||||||
@dataclass
|
|
||||||
class Cfg:
|
|
||||||
flag: bool = m.env_bool_field("FLAG", default=False)
|
|
||||||
out: Path = m.env_path_field("OUT", default="ab", resolve=True)
|
|
||||||
name: str = m.env_str_field("NAME", default="anon")
|
|
||||||
|
|
||||||
# First instantiation
|
|
||||||
with patch.dict(
|
|
||||||
os.environ, {"FLAG": "true", "OUT": "outdir", "NAME": "alice"}, clear=True
|
|
||||||
):
|
|
||||||
cfg1 = Cfg()
|
|
||||||
self.assertTrue(cfg1.flag)
|
|
||||||
self.assertIsInstance(cfg1.out, Path)
|
|
||||||
self.assertTrue(cfg1.out.is_absolute())
|
|
||||||
self.assertEqual(cfg1.name, "alice")
|
|
||||||
cfg1.name = "bob" # change instance value
|
|
||||||
self.assertEqual(cfg1.name, "bob") # change is reflected
|
|
||||||
|
|
||||||
# Change env; new instance should reflect new values
|
|
||||||
with patch.dict(os.environ, {"FLAG": "false", "NAME": ""}, clear=True):
|
|
||||||
cfg2 = Cfg()
|
|
||||||
self.assertFalse(cfg2.flag) # str2bool("false") -> False
|
|
||||||
self.assertTrue("ab" in str(cfg2.out))
|
|
||||||
self.assertIsInstance(cfg2.out, Path)
|
|
||||||
self.assertTrue(cfg2.out.is_absolute())
|
|
||||||
self.assertEqual(cfg2.name, "anon") # empty -> fallback to default
|
|
||||||
|
|
||||||
def test_dataclass_path_field_with_default_value(self):
|
|
||||||
@dataclass
|
|
||||||
class C2:
|
|
||||||
out: Path = m.env_path_field("OUT", default="some/dir", resolve=False)
|
|
||||||
|
|
||||||
with patch.dict(os.environ, {}, clear=True):
|
|
||||||
c = C2()
|
|
||||||
self.assertEqual(c.out, Path("some/dir"))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@ -1,122 +0,0 @@
|
|||||||
# test_path_utils.py
|
|
||||||
# Run: pytest -q
|
|
||||||
|
|
||||||
import os
|
|
||||||
import unittest
|
|
||||||
from pathlib import Path
|
|
||||||
from tempfile import TemporaryDirectory
|
|
||||||
|
|
||||||
from cli.lib.common.path_helper import (
|
|
||||||
copy,
|
|
||||||
ensure_dir_exists,
|
|
||||||
force_create_dir,
|
|
||||||
get_path,
|
|
||||||
is_path_exist,
|
|
||||||
remove_dir,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class TestPathHelper(unittest.TestCase):
|
|
||||||
def setUp(self):
|
|
||||||
self.tmpdir = TemporaryDirectory()
|
|
||||||
self.tmp_path = Path(self.tmpdir.name)
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
self.tmpdir.cleanup()
|
|
||||||
|
|
||||||
# -------- get_path --------
|
|
||||||
def test_get_path_returns_path_for_str(self):
|
|
||||||
# Use relative path to avoid absolute-ness
|
|
||||||
rel_str = "sub/f.txt"
|
|
||||||
os.chdir(self.tmp_path)
|
|
||||||
p = get_path(rel_str, resolve=False)
|
|
||||||
self.assertIsInstance(p, Path)
|
|
||||||
self.assertFalse(p.is_absolute())
|
|
||||||
self.assertEqual(str(p), rel_str)
|
|
||||||
|
|
||||||
def test_get_path_resolves(self):
|
|
||||||
rel_str = "sub/f.txt"
|
|
||||||
p = get_path(str(self.tmp_path / rel_str), resolve=True)
|
|
||||||
self.assertTrue(p.is_absolute())
|
|
||||||
self.assertTrue(str(p).endswith(rel_str))
|
|
||||||
|
|
||||||
def test_get_path_with_path_input(self):
|
|
||||||
p_in = self.tmp_path / "sub/f.txt"
|
|
||||||
p_out = get_path(p_in, resolve=False)
|
|
||||||
self.assertTrue(str(p_out) == str(p_in))
|
|
||||||
|
|
||||||
def test_get_path_with_none_raises(self):
|
|
||||||
with self.assertRaises(ValueError):
|
|
||||||
get_path(None) # type: ignore[arg-type]
|
|
||||||
|
|
||||||
def test_get_path_invalid_type_raises(self):
|
|
||||||
with self.assertRaises(TypeError):
|
|
||||||
get_path(123) # type: ignore[arg-type]
|
|
||||||
|
|
||||||
# -------- ensure_dir_exists / force_create_dir / remove_dir --------
|
|
||||||
def test_ensure_dir_exists_creates_and_is_idempotent(self):
|
|
||||||
d = self.tmp_path / "made"
|
|
||||||
ensure_dir_exists(d)
|
|
||||||
self.assertTrue(d.exists() and d.is_dir())
|
|
||||||
ensure_dir_exists(d)
|
|
||||||
|
|
||||||
def test_force_create_dir_clears_existing(self):
|
|
||||||
d = self.tmp_path / "fresh"
|
|
||||||
(d / "inner").mkdir(parents=True)
|
|
||||||
(d / "inner" / "f.txt").write_text("x")
|
|
||||||
force_create_dir(d)
|
|
||||||
self.assertTrue(d.exists())
|
|
||||||
self.assertEqual(list(d.iterdir()), [])
|
|
||||||
|
|
||||||
def test_remove_dir_none_is_noop(self):
|
|
||||||
remove_dir(None) # type: ignore[arg-type]
|
|
||||||
|
|
||||||
def test_remove_dir_nonexistent_is_noop(self):
|
|
||||||
ghost = self.tmp_path / "ghost"
|
|
||||||
remove_dir(ghost)
|
|
||||||
|
|
||||||
def test_remove_dir_accepts_str(self):
|
|
||||||
d = self.tmp_path / "to_rm"
|
|
||||||
d.mkdir()
|
|
||||||
remove_dir(str(d))
|
|
||||||
self.assertFalse(d.exists())
|
|
||||||
|
|
||||||
# -------- copy --------
|
|
||||||
def test_copy_file_to_file(self):
|
|
||||||
src = self.tmp_path / "src.txt"
|
|
||||||
dst = self.tmp_path / "out" / "dst.txt"
|
|
||||||
src.write_text("hello")
|
|
||||||
copy(src, dst)
|
|
||||||
self.assertEqual(dst.read_text(), "hello")
|
|
||||||
|
|
||||||
def test_copy_dir_to_new_dir(self):
|
|
||||||
src = self.tmp_path / "srcdir"
|
|
||||||
(src / "a").mkdir(parents=True)
|
|
||||||
(src / "a" / "f.txt").write_text("content")
|
|
||||||
dst = self.tmp_path / "destdir"
|
|
||||||
copy(src, dst)
|
|
||||||
self.assertEqual((dst / "a" / "f.txt").read_text(), "content")
|
|
||||||
|
|
||||||
def test_copy_dir_into_existing_dir_overwrite_true_merges(self):
|
|
||||||
src = self.tmp_path / "srcdir"
|
|
||||||
dst = self.tmp_path / "destdir"
|
|
||||||
(src / "x").mkdir(parents=True)
|
|
||||||
(src / "x" / "new.txt").write_text("new")
|
|
||||||
dst.mkdir()
|
|
||||||
(dst / "existing.txt").write_text("old")
|
|
||||||
copy(src, dst)
|
|
||||||
self.assertEqual((dst / "existing.txt").read_text(), "old")
|
|
||||||
self.assertEqual((dst / "x" / "new.txt").read_text(), "new")
|
|
||||||
|
|
||||||
def test_is_str_path_exist(self):
|
|
||||||
p = self.tmp_path / "x.txt"
|
|
||||||
p.write_text("1")
|
|
||||||
self.assertTrue(is_path_exist(str(p)))
|
|
||||||
self.assertTrue(is_path_exist(p))
|
|
||||||
self.assertFalse(is_path_exist(str(self.tmp_path / "missing")))
|
|
||||||
self.assertFalse(is_path_exist(self.tmp_path / "missing"))
|
|
||||||
self.assertFalse(is_path_exist(""))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main()
|
|
||||||
@ -1,185 +0,0 @@
|
|||||||
# tests/test_run_test_plan.py
|
|
||||||
import importlib
|
|
||||||
from contextlib import nullcontext
|
|
||||||
from types import SimpleNamespace
|
|
||||||
from unittest.mock import MagicMock
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
|
|
||||||
|
|
||||||
MOD = "cli.lib.core.vllm.lib"
|
|
||||||
|
|
||||||
# We import inside tests so the MOD override above applies everywhere
|
|
||||||
run_test_plan_import_path = f"{MOD}.run_test_plan"
|
|
||||||
|
|
||||||
|
|
||||||
def _get_cmd(c):
|
|
||||||
# Support both kwargs and positional args
|
|
||||||
return c.kwargs.get("cmd", c.args[0] if c.args else None)
|
|
||||||
|
|
||||||
|
|
||||||
def _get_check(c):
|
|
||||||
if "check" in c.kwargs:
|
|
||||||
return c.kwargs["check"]
|
|
||||||
# If positional, assume second arg is 'check' when present; default False
|
|
||||||
return c.args[1] if len(c.args) > 1 else False
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def patch_module(monkeypatch):
|
|
||||||
"""
|
|
||||||
Patch helpers ('pip_install_packages', 'temp_environ', 'working_directory',
|
|
||||||
'run_command', 'logger') inside the target module and expose them.
|
|
||||||
"""
|
|
||||||
module = importlib.import_module(MOD)
|
|
||||||
|
|
||||||
# Create fakes/mocks
|
|
||||||
pip_install_packages = MagicMock(name="pip_install_packages")
|
|
||||||
run_command = MagicMock(name="run_command", return_value=0)
|
|
||||||
|
|
||||||
# temp_environ / working_directory: record calls but act as context managers
|
|
||||||
temp_calls: list[dict] = []
|
|
||||||
workdir_calls: list[str] = []
|
|
||||||
|
|
||||||
def fake_working_directory(path: str):
|
|
||||||
workdir_calls.append(path)
|
|
||||||
return nullcontext()
|
|
||||||
|
|
||||||
def fake_temp_env(map: dict[str, str]):
|
|
||||||
temp_calls.append(map)
|
|
||||||
return nullcontext()
|
|
||||||
|
|
||||||
logger = SimpleNamespace(
|
|
||||||
info=MagicMock(name="logger.info"),
|
|
||||||
error=MagicMock(name="logger.error"),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Apply patches (raise if attribute doesn't exist)
|
|
||||||
monkeypatch.setattr(
|
|
||||||
module, "pip_install_packages", pip_install_packages, raising=True
|
|
||||||
)
|
|
||||||
monkeypatch.setattr(module, "run_command", run_command, raising=True)
|
|
||||||
monkeypatch.setattr(
|
|
||||||
module, "working_directory", fake_working_directory, raising=True
|
|
||||||
)
|
|
||||||
monkeypatch.setattr(module, "temp_environ", fake_temp_env, raising=True)
|
|
||||||
monkeypatch.setattr(module, "logger", logger, raising=True)
|
|
||||||
|
|
||||||
return SimpleNamespace(
|
|
||||||
module=module,
|
|
||||||
run_test_plan=module.run_test_plan, # expose to avoid getattr("constant") (Ruff B009)
|
|
||||||
pip_install_packages=pip_install_packages,
|
|
||||||
run_command=run_command,
|
|
||||||
temp_calls=temp_calls,
|
|
||||||
workdir_calls=workdir_calls,
|
|
||||||
logger=logger,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_success_runs_all_steps_and_uses_env_and_workdir(monkeypatch, patch_module):
|
|
||||||
run_test_plan = patch_module.run_test_plan
|
|
||||||
|
|
||||||
tests_map = {
|
|
||||||
"basic": {
|
|
||||||
"title": "Basic suite",
|
|
||||||
"package_install": [],
|
|
||||||
"working_directory": "tests",
|
|
||||||
"env_vars": {"GLOBAL_FLAG": "1"},
|
|
||||||
"steps": [
|
|
||||||
"export A=x && pytest -q",
|
|
||||||
"export B=y && pytest -q tests/unit",
|
|
||||||
],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# One exit code per step (export + two pytest)
|
|
||||||
patch_module.run_command.side_effect = [0, 0, 0]
|
|
||||||
|
|
||||||
run_test_plan("basic", "cpu", tests_map)
|
|
||||||
|
|
||||||
calls = patch_module.run_command.call_args_list
|
|
||||||
cmds = [_get_cmd(c) for c in calls]
|
|
||||||
checks = [_get_check(c) for c in calls]
|
|
||||||
|
|
||||||
assert cmds == [
|
|
||||||
"export A=x && pytest -q",
|
|
||||||
"export B=y && pytest -q tests/unit",
|
|
||||||
]
|
|
||||||
assert all(chk is False for chk in checks)
|
|
||||||
|
|
||||||
assert patch_module.workdir_calls == ["tests"]
|
|
||||||
assert patch_module.temp_calls == [{"GLOBAL_FLAG": "1"}]
|
|
||||||
|
|
||||||
|
|
||||||
def test_installs_packages_when_present(monkeypatch, patch_module):
|
|
||||||
run_test_plan = patch_module.module.run_test_plan
|
|
||||||
|
|
||||||
tests_map = {
|
|
||||||
"with_pkgs": {
|
|
||||||
"title": "Needs deps",
|
|
||||||
"package_install": ["timm==1.0.0", "flash-attn"],
|
|
||||||
"steps": ["pytest -q"],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
patch_module.run_command.return_value = 0
|
|
||||||
|
|
||||||
run_test_plan("with_pkgs", "gpu", tests_map)
|
|
||||||
|
|
||||||
patch_module.pip_install_packages.assert_called_once_with(
|
|
||||||
packages=["timm==1.0.0", "flash-attn"],
|
|
||||||
prefer_uv=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def test_raises_on_missing_plan(patch_module):
|
|
||||||
run_test_plan = patch_module.module.run_test_plan
|
|
||||||
with pytest.raises(RuntimeError) as ei:
|
|
||||||
run_test_plan("nope", "cpu", tests_map={})
|
|
||||||
|
|
||||||
assert "test nope not found" in str(ei.value)
|
|
||||||
|
|
||||||
|
|
||||||
def test_aggregates_failures_and_raises(monkeypatch, patch_module):
|
|
||||||
run_test_plan = patch_module.module.run_test_plan
|
|
||||||
|
|
||||||
tests_map = {
|
|
||||||
"mix": {
|
|
||||||
"title": "Some pass some fail",
|
|
||||||
"steps": [
|
|
||||||
"pytest test_a.py", # 0 → pass
|
|
||||||
"pytest test_b.py", # 1 → fail
|
|
||||||
"pytest test_c.py", # 2 → fail
|
|
||||||
],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
# Simulate pass, fail, fail
|
|
||||||
patch_module.run_command.side_effect = [0, 1, 2]
|
|
||||||
|
|
||||||
with pytest.raises(RuntimeError) as ei:
|
|
||||||
run_test_plan("mix", "cpu", tests_map)
|
|
||||||
|
|
||||||
msg = str(ei.value)
|
|
||||||
assert "2 pytest runs failed" in msg
|
|
||||||
# Ensure logger captured failed tests list
|
|
||||||
patch_module.logger.error.assert_called_once()
|
|
||||||
# And we attempted all three commands
|
|
||||||
assert patch_module.run_command.call_count == 3
|
|
||||||
|
|
||||||
|
|
||||||
def test_custom_working_directory_used(patch_module):
|
|
||||||
run_test_plan = patch_module.module.run_test_plan
|
|
||||||
|
|
||||||
tests_map = {
|
|
||||||
"customwd": {
|
|
||||||
"title": "Custom wd",
|
|
||||||
"working_directory": "examples/ci",
|
|
||||||
"steps": ["pytest -q"],
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
patch_module.run_command.return_value = 0
|
|
||||||
run_test_plan("customwd", "cpu", tests_map)
|
|
||||||
|
|
||||||
assert patch_module.workdir_calls == ["examples/ci"]
|
|
||||||
@ -1,143 +0,0 @@
|
|||||||
import os
|
|
||||||
import tempfile
|
|
||||||
import unittest
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
from cli.lib.common.utils import temp_environ, working_directory # <-- replace import
|
|
||||||
|
|
||||||
|
|
||||||
class EnvIsolatedTestCase(unittest.TestCase):
|
|
||||||
"""Base class that snapshots os.environ and CWD for isolation."""
|
|
||||||
|
|
||||||
def setUp(self):
|
|
||||||
import os
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
self._env_backup = dict(os.environ)
|
|
||||||
|
|
||||||
# Snapshot/repair CWD if it's gone
|
|
||||||
try:
|
|
||||||
self._cwd_backup = os.getcwd()
|
|
||||||
except FileNotFoundError:
|
|
||||||
# If CWD no longer exists, switch to a safe place and record that
|
|
||||||
self._cwd_backup = tempfile.gettempdir()
|
|
||||||
os.chdir(self._cwd_backup)
|
|
||||||
|
|
||||||
# Create a temporary directory for the test to run in
|
|
||||||
self._temp_dir = tempfile.mkdtemp()
|
|
||||||
os.chdir(self._temp_dir)
|
|
||||||
|
|
||||||
def tearDown(self):
|
|
||||||
import os
|
|
||||||
import shutil
|
|
||||||
import tempfile
|
|
||||||
|
|
||||||
# Restore cwd first (before cleaning up temp dir)
|
|
||||||
try:
|
|
||||||
os.chdir(self._cwd_backup)
|
|
||||||
except OSError:
|
|
||||||
os.chdir(tempfile.gettempdir())
|
|
||||||
|
|
||||||
# Clean up temporary directory
|
|
||||||
try:
|
|
||||||
shutil.rmtree(self._temp_dir, ignore_errors=True)
|
|
||||||
except Exception:
|
|
||||||
pass # Ignore cleanup errors
|
|
||||||
|
|
||||||
# Restore env
|
|
||||||
to_del = set(os.environ.keys()) - set(self._env_backup.keys())
|
|
||||||
for k in to_del:
|
|
||||||
os.environ.pop(k, None)
|
|
||||||
for k, v in self._env_backup.items():
|
|
||||||
os.environ[k] = v
|
|
||||||
|
|
||||||
|
|
||||||
class TestTempEnviron(EnvIsolatedTestCase):
|
|
||||||
def test_sets_and_restores_new_var(self):
|
|
||||||
var = "TEST_TMP_ENV_NEW"
|
|
||||||
self.assertNotIn(var, os.environ)
|
|
||||||
|
|
||||||
with temp_environ({var: "123"}):
|
|
||||||
self.assertEqual(os.environ[var], "123")
|
|
||||||
|
|
||||||
self.assertNotIn(var, os.environ) # removed after exit
|
|
||||||
|
|
||||||
def test_overwrites_and_restores_existing_var(self):
|
|
||||||
var = "TEST_TMP_ENV_OVERWRITE"
|
|
||||||
os.environ[var] = "orig"
|
|
||||||
|
|
||||||
with temp_environ({var: "override"}):
|
|
||||||
self.assertEqual(os.environ[var], "override")
|
|
||||||
|
|
||||||
self.assertEqual(os.environ[var], "orig") # restored
|
|
||||||
|
|
||||||
def test_multiple_vars_and_missing_cleanup(self):
|
|
||||||
v1, v2 = "TEST_ENV_V1", "TEST_ENV_V2"
|
|
||||||
os.environ.pop(v1, None)
|
|
||||||
os.environ[v2] = "keep"
|
|
||||||
|
|
||||||
with temp_environ({v1: "a", v2: "b"}):
|
|
||||||
self.assertEqual(os.environ[v1], "a")
|
|
||||||
self.assertEqual(os.environ[v2], "b")
|
|
||||||
|
|
||||||
self.assertNotIn(v1, os.environ) # newly-added -> removed
|
|
||||||
self.assertEqual(os.environ[v2], "keep") # pre-existing -> restored
|
|
||||||
|
|
||||||
def test_restores_even_on_exception(self):
|
|
||||||
var = "TEST_TMP_ENV_EXCEPTION"
|
|
||||||
self.assertNotIn(var, os.environ)
|
|
||||||
|
|
||||||
with self.assertRaises(RuntimeError):
|
|
||||||
with temp_environ({var: "x"}):
|
|
||||||
self.assertEqual(os.environ[var], "x")
|
|
||||||
raise RuntimeError("boom")
|
|
||||||
|
|
||||||
self.assertNotIn(var, os.environ) # removed after exception
|
|
||||||
|
|
||||||
|
|
||||||
class TestWorkingDirectory(EnvIsolatedTestCase):
|
|
||||||
def test_changes_and_restores(self):
|
|
||||||
start = Path.cwd()
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
target = Path(td) / "wd"
|
|
||||||
target.mkdir()
|
|
||||||
|
|
||||||
with working_directory(str(target)):
|
|
||||||
self.assertEqual(Path.cwd().resolve(), target.resolve())
|
|
||||||
|
|
||||||
self.assertEqual(Path.cwd(), start)
|
|
||||||
|
|
||||||
def test_noop_when_empty_path(self):
|
|
||||||
start = Path.cwd()
|
|
||||||
with working_directory(""):
|
|
||||||
self.assertEqual(Path.cwd(), start)
|
|
||||||
self.assertEqual(Path.cwd(), start)
|
|
||||||
|
|
||||||
def test_restores_on_exception(self):
|
|
||||||
start = Path.cwd()
|
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
target = Path(td) / "wd_exc"
|
|
||||||
target.mkdir()
|
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
|
||||||
with working_directory(str(target)):
|
|
||||||
# Normalize both sides to handle /var -> /private/var
|
|
||||||
self.assertEqual(Path.cwd().resolve(), target.resolve())
|
|
||||||
raise ValueError("boom")
|
|
||||||
|
|
||||||
self.assertEqual(Path.cwd().resolve(), start.resolve())
|
|
||||||
|
|
||||||
def test_raises_for_missing_dir(self):
|
|
||||||
start = Path.cwd()
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
missing = Path(td) / "does_not_exist"
|
|
||||||
with self.assertRaises(FileNotFoundError):
|
|
||||||
# os.chdir should raise before yielding
|
|
||||||
with working_directory(str(missing)):
|
|
||||||
pass
|
|
||||||
self.assertEqual(Path.cwd(), start)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
unittest.main(verbosity=2)
|
|
||||||
@ -1,176 +0,0 @@
|
|||||||
import os
|
|
||||||
import tempfile
|
|
||||||
import unittest
|
|
||||||
from pathlib import Path
|
|
||||||
from unittest.mock import MagicMock, patch
|
|
||||||
|
|
||||||
import cli.lib.core.vllm.vllm_build as vllm_build
|
|
||||||
|
|
||||||
|
|
||||||
_VLLM_BUILD_MODULE = "cli.lib.core.vllm.vllm_build"
|
|
||||||
|
|
||||||
|
|
||||||
class TestVllmBuildParameters(unittest.TestCase):
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.local_image_exists", return_value=True)
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.is_path_exist", return_value=True)
|
|
||||||
@patch(
|
|
||||||
"cli.lib.common.envs_helper.env_path_optional",
|
|
||||||
side_effect=lambda name, default=None, resolve=True: {
|
|
||||||
"DOCKERFILE_PATH": Path("/abs/vllm/Dockerfile"),
|
|
||||||
"TORCH_WHEELS_PATH": Path("/abs/dist"),
|
|
||||||
"OUTPUT_DIR": Path("/abs/shared"),
|
|
||||||
}.get(name, Path(default) if default is not None else None),
|
|
||||||
)
|
|
||||||
@patch.dict(
|
|
||||||
os.environ,
|
|
||||||
{
|
|
||||||
"USE_TORCH_WHEEL": "1",
|
|
||||||
"USE_LOCAL_BASE_IMAGE": "1",
|
|
||||||
"USE_LOCAL_DOCKERFILE": "1",
|
|
||||||
"BASE_IMAGE": "my/image:tag",
|
|
||||||
"DOCKERFILE_PATH": "vllm/Dockerfile",
|
|
||||||
"TORCH_WHEELS_PATH": "dist",
|
|
||||||
"OUTPUT_DIR": "shared",
|
|
||||||
},
|
|
||||||
clear=True,
|
|
||||||
)
|
|
||||||
def test_params_success_normalizes_and_validates(
|
|
||||||
self, mock_env_path, mock_is_path, mock_local_img
|
|
||||||
):
|
|
||||||
params = vllm_build.VllmBuildParameters()
|
|
||||||
self.assertEqual(params.torch_whls_path, Path("/abs/dist"))
|
|
||||||
self.assertEqual(params.dockerfile_path, Path("/abs/vllm/Dockerfile"))
|
|
||||||
self.assertEqual(params.output_dir, Path("/abs/shared"))
|
|
||||||
self.assertEqual(params.base_image, "my/image:tag")
|
|
||||||
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.is_path_exist", return_value=False)
|
|
||||||
@patch.dict(
|
|
||||||
os.environ, {"USE_TORCH_WHEEL": "1", "TORCH_WHEELS_PATH": "dist"}, clear=True
|
|
||||||
)
|
|
||||||
def test_params_missing_torch_whls_raises(self, _is_path):
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
os.chdir(td)
|
|
||||||
with self.assertRaises(ValueError) as cm:
|
|
||||||
vllm_build.VllmBuildParameters(
|
|
||||||
use_local_base_image=False,
|
|
||||||
use_local_dockerfile=False,
|
|
||||||
)
|
|
||||||
err = cm.exception
|
|
||||||
self.assertIn("TORCH_WHEELS_PATH", str(err))
|
|
||||||
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.local_image_exists", return_value=False)
|
|
||||||
@patch.dict(
|
|
||||||
os.environ, {"USE_LOCAL_BASE_IMAGE": "1", "BASE_IMAGE": "img:tag"}, clear=True
|
|
||||||
)
|
|
||||||
def test_params_missing_local_base_image_raises(self, _local_img):
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
os.chdir(td)
|
|
||||||
with self.assertRaises(ValueError) as cm:
|
|
||||||
vllm_build.VllmBuildParameters(
|
|
||||||
use_torch_whl=False,
|
|
||||||
use_local_dockerfile=False,
|
|
||||||
)
|
|
||||||
err = cm.exception
|
|
||||||
self.assertIn("BASE_IMAGE", str(err))
|
|
||||||
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.is_path_exist", return_value=False)
|
|
||||||
@patch.dict(
|
|
||||||
os.environ,
|
|
||||||
{"USE_LOCAL_DOCKERFILE": "1", "DOCKERFILE_PATH": "Dockerfile"},
|
|
||||||
clear=True,
|
|
||||||
)
|
|
||||||
def test_params_missing_dockerfile_raises(self, _is_path):
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
os.chdir(td)
|
|
||||||
with self.assertRaises(ValueError) as cm:
|
|
||||||
vllm_build.VllmBuildParameters(
|
|
||||||
use_torch_whl=False,
|
|
||||||
use_local_base_image=False,
|
|
||||||
)
|
|
||||||
err = cm.exception
|
|
||||||
self.assertIn("DOCKERFILE_PATH", str(err))
|
|
||||||
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.is_path_exist", return_value=False)
|
|
||||||
@patch.dict(
|
|
||||||
os.environ,
|
|
||||||
{"OUTPUT_DIR": ""},
|
|
||||||
clear=True,
|
|
||||||
)
|
|
||||||
def test_params_missing_output_dir(self, _is_path):
|
|
||||||
with self.assertRaises(FileNotFoundError):
|
|
||||||
vllm_build.VllmBuildParameters()
|
|
||||||
|
|
||||||
|
|
||||||
class TestBuildCmdAndRun(unittest.TestCase):
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.local_image_exists", return_value=True)
|
|
||||||
def test_generate_docker_build_cmd_includes_bits(self, _exists):
|
|
||||||
runner = vllm_build.VllmBuildRunner()
|
|
||||||
inputs = MagicMock()
|
|
||||||
inputs.output_dir = Path("/abs/out")
|
|
||||||
inputs.use_local_base_image = True
|
|
||||||
inputs.base_image = "img:tag"
|
|
||||||
inputs.torch_whls_path = Path("./vllm/tmp")
|
|
||||||
inputs.max_jobs = 64
|
|
||||||
inputs.cuda_version = "12.8.1"
|
|
||||||
inputs.python_version = "3.12"
|
|
||||||
inputs.sccache_bucket = "my-bucket"
|
|
||||||
inputs.sccache_region = "us-west-2"
|
|
||||||
inputs.torch_cuda_arch_list = "8.0;9.0"
|
|
||||||
inputs.target_stage = "export-wheels"
|
|
||||||
inputs.tag_name = "vllm-wheels"
|
|
||||||
|
|
||||||
cmd = runner._generate_docker_build_cmd(inputs)
|
|
||||||
squashed = " ".join(cmd.split())
|
|
||||||
|
|
||||||
self.assertIn("--output type=local,dest=/abs/out", squashed)
|
|
||||||
self.assertIn("-f docker/Dockerfile.nightly_torch", squashed)
|
|
||||||
self.assertIn("--pull=false", squashed)
|
|
||||||
self.assertIn("--build-arg TORCH_WHEELS_PATH=tmp", squashed)
|
|
||||||
self.assertIn("--build-arg BUILD_BASE_IMAGE=img:tag", squashed)
|
|
||||||
self.assertIn("--build-arg FINAL_BASE_IMAGE=img:tag", squashed)
|
|
||||||
self.assertIn("--build-arg max_jobs=64", squashed)
|
|
||||||
self.assertIn("--build-arg CUDA_VERSION=12.8.1", squashed)
|
|
||||||
self.assertIn("--build-arg PYTHON_VERSION=3.12", squashed)
|
|
||||||
self.assertIn("--build-arg USE_SCCACHE=1", squashed)
|
|
||||||
self.assertIn("--build-arg SCCACHE_BUCKET_NAME=my-bucket", squashed)
|
|
||||||
self.assertIn("--build-arg SCCACHE_REGION_NAME=us-west-2", squashed)
|
|
||||||
self.assertIn("--build-arg torch_cuda_arch_list='8.0;9.0'", squashed)
|
|
||||||
self.assertIn("--target export-wheels", squashed)
|
|
||||||
self.assertIn("-t vllm-wheels", squashed)
|
|
||||||
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.run_command")
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.ensure_dir_exists")
|
|
||||||
@patch(f"{_VLLM_BUILD_MODULE}.clone_vllm")
|
|
||||||
@patch.object(
|
|
||||||
vllm_build.VllmBuildRunner,
|
|
||||||
"_generate_docker_build_cmd",
|
|
||||||
return_value="docker buildx ...",
|
|
||||||
)
|
|
||||||
@patch.dict(
|
|
||||||
os.environ,
|
|
||||||
{
|
|
||||||
"USE_TORCH_WHEEL": "0",
|
|
||||||
"USE_LOCAL_BASE_IMAGE": "0",
|
|
||||||
"USE_LOCAL_DOCKERFILE": "0",
|
|
||||||
"OUTPUT_DIR": "shared",
|
|
||||||
},
|
|
||||||
clear=True,
|
|
||||||
)
|
|
||||||
def test_run_calls_clone_prepare_and_build(
|
|
||||||
self, mock_gen, mock_clone, mock_ensure, mock_run
|
|
||||||
):
|
|
||||||
params = MagicMock()
|
|
||||||
params.output_dir = Path("shared")
|
|
||||||
params.use_local_dockerfile = False
|
|
||||||
params.use_torch_whl = False
|
|
||||||
|
|
||||||
with patch(f"{_VLLM_BUILD_MODULE}.VllmBuildParameters", return_value=params):
|
|
||||||
runner = vllm_build.VllmBuildRunner()
|
|
||||||
runner.run()
|
|
||||||
|
|
||||||
mock_clone.assert_called_once()
|
|
||||||
mock_ensure.assert_called_once_with(Path("shared"))
|
|
||||||
mock_gen.assert_called_once_with(params)
|
|
||||||
mock_run.assert_called_once()
|
|
||||||
_, kwargs = mock_run.call_args
|
|
||||||
assert kwargs.get("cwd") == "vllm"
|
|
||||||
@ -16,7 +16,6 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \
|
|||||||
magma/build_magma.sh
|
magma/build_magma.sh
|
||||||
|
|
||||||
.PHONY: all
|
.PHONY: all
|
||||||
all: magma-cuda130
|
|
||||||
all: magma-cuda129
|
all: magma-cuda129
|
||||||
all: magma-cuda128
|
all: magma-cuda128
|
||||||
all: magma-cuda126
|
all: magma-cuda126
|
||||||
@ -26,12 +25,6 @@ clean:
|
|||||||
$(RM) -r magma-*
|
$(RM) -r magma-*
|
||||||
$(RM) -r output
|
$(RM) -r output
|
||||||
|
|
||||||
.PHONY: magma-cuda130
|
|
||||||
magma-cuda130: DESIRED_CUDA := 13.0
|
|
||||||
magma-cuda130: CUDA_ARCH_LIST := -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120
|
|
||||||
magma-cuda130:
|
|
||||||
$(DOCKER_RUN)
|
|
||||||
|
|
||||||
.PHONY: magma-cuda129
|
.PHONY: magma-cuda129
|
||||||
magma-cuda129: DESIRED_CUDA := 12.9
|
magma-cuda129: DESIRED_CUDA := 12.9
|
||||||
magma-cuda129: CUDA_ARCH_LIST += -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120
|
magma-cuda129: CUDA_ARCH_LIST += -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120
|
||||||
|
|||||||
@ -28,7 +28,6 @@ pushd ${PACKAGE_DIR}/magma-${MAGMA_VERSION}
|
|||||||
patch < ${PACKAGE_FILES}/CMake.patch
|
patch < ${PACKAGE_FILES}/CMake.patch
|
||||||
patch < ${PACKAGE_FILES}/cmakelists.patch
|
patch < ${PACKAGE_FILES}/cmakelists.patch
|
||||||
patch -p0 < ${PACKAGE_FILES}/thread_queue.patch
|
patch -p0 < ${PACKAGE_FILES}/thread_queue.patch
|
||||||
patch -p1 < ${PACKAGE_FILES}/cuda13.patch
|
|
||||||
patch -p1 < ${PACKAGE_FILES}/getrf_shfl.patch
|
patch -p1 < ${PACKAGE_FILES}/getrf_shfl.patch
|
||||||
patch -p1 < ${PACKAGE_FILES}/getrf_nbparam.patch
|
patch -p1 < ${PACKAGE_FILES}/getrf_nbparam.patch
|
||||||
# The build.sh script expects to be executed from the sources root folder
|
# The build.sh script expects to be executed from the sources root folder
|
||||||
@ -38,7 +37,6 @@ popd
|
|||||||
# Package recipe, license and tarball
|
# Package recipe, license and tarball
|
||||||
# Folder and package name are backward compatible for the build workflow
|
# Folder and package name are backward compatible for the build workflow
|
||||||
cp ${PACKAGE_FILES}/build.sh ${PACKAGE_RECIPE}/build.sh
|
cp ${PACKAGE_FILES}/build.sh ${PACKAGE_RECIPE}/build.sh
|
||||||
cp ${PACKAGE_FILES}/cuda13.patch ${PACKAGE_RECIPE}/cuda13.patch
|
|
||||||
cp ${PACKAGE_FILES}/thread_queue.patch ${PACKAGE_RECIPE}/thread_queue.patch
|
cp ${PACKAGE_FILES}/thread_queue.patch ${PACKAGE_RECIPE}/thread_queue.patch
|
||||||
cp ${PACKAGE_FILES}/cmakelists.patch ${PACKAGE_RECIPE}/cmakelists.patch
|
cp ${PACKAGE_FILES}/cmakelists.patch ${PACKAGE_RECIPE}/cmakelists.patch
|
||||||
cp ${PACKAGE_FILES}/getrf_shfl.patch ${PACKAGE_RECIPE}/getrf_shfl.patch
|
cp ${PACKAGE_FILES}/getrf_shfl.patch ${PACKAGE_RECIPE}/getrf_shfl.patch
|
||||||
|
|||||||
@ -1,26 +0,0 @@
|
|||||||
diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp
|
|
||||||
index 73fed1b20..e77519bfe 100644
|
|
||||||
--- a/interface_cuda/interface.cpp
|
|
||||||
+++ b/interface_cuda/interface.cpp
|
|
||||||
@@ -438,14 +438,20 @@ magma_print_environment()
|
|
||||||
cudaDeviceProp prop;
|
|
||||||
err = cudaGetDeviceProperties( &prop, dev );
|
|
||||||
check_error( err );
|
|
||||||
+ #ifdef MAGMA_HAVE_CUDA
|
|
||||||
+#if CUDA_VERSION < 13000
|
|
||||||
printf( "%% device %d: %s, %.1f MHz clock, %.1f MiB memory, capability %d.%d\n",
|
|
||||||
dev,
|
|
||||||
prop.name,
|
|
||||||
prop.clockRate / 1000.,
|
|
||||||
+#else
|
|
||||||
+ printf( "%% device %d: %s, ??? MHz clock, %.1f MiB memory, capability %d.%d\n",
|
|
||||||
+ dev,
|
|
||||||
+ prop.name,
|
|
||||||
+#endif
|
|
||||||
prop.totalGlobalMem / (1024.*1024.),
|
|
||||||
prop.major,
|
|
||||||
prop.minor );
|
|
||||||
- #ifdef MAGMA_HAVE_CUDA
|
|
||||||
int arch = prop.major*100 + prop.minor*10;
|
|
||||||
if ( arch < MAGMA_CUDA_ARCH_MIN ) {
|
|
||||||
printf("\n"
|
|
||||||
@ -5,6 +5,10 @@ set -ex
|
|||||||
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
|
||||||
|
|
||||||
case "${GPU_ARCH_TYPE:-BLANK}" in
|
case "${GPU_ARCH_TYPE:-BLANK}" in
|
||||||
|
BLANK)
|
||||||
|
# Legacy behavior for CircleCI
|
||||||
|
bash "${SCRIPTPATH}/build_cuda.sh"
|
||||||
|
;;
|
||||||
cuda)
|
cuda)
|
||||||
bash "${SCRIPTPATH}/build_cuda.sh"
|
bash "${SCRIPTPATH}/build_cuda.sh"
|
||||||
;;
|
;;
|
||||||
|
|||||||
@ -138,11 +138,28 @@ fi
|
|||||||
|
|
||||||
echo "Calling setup.py bdist at $(date)"
|
echo "Calling setup.py bdist at $(date)"
|
||||||
|
|
||||||
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||||
|
echo "Calling setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||||
|
time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||||
|
BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 \
|
||||||
|
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||||
|
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||||
|
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||||
|
echo "Finished setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||||
|
echo "Calling setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||||
|
time EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||||
|
BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 \
|
||||||
|
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||||
|
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||||
|
CMAKE_FRESH=1 python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||||
|
echo "Finished setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||||
|
else
|
||||||
|
time CMAKE_ARGS=${CMAKE_ARGS[@]} \
|
||||||
EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
EXTRA_CAFFE2_CMAKE_FLAGS=${EXTRA_CAFFE2_CMAKE_FLAGS[@]} \
|
||||||
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
BUILD_LIBTORCH_CPU_WITH_DEBUG=$BUILD_DEBUG_INFO \
|
||||||
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
USE_NCCL=${USE_NCCL} USE_RCCL=${USE_RCCL} USE_KINETO=${USE_KINETO} \
|
||||||
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
python setup.py bdist_wheel -d /tmp/$WHEELHOUSE_DIR
|
||||||
|
fi
|
||||||
echo "Finished setup.py bdist at $(date)"
|
echo "Finished setup.py bdist at $(date)"
|
||||||
|
|
||||||
# Build libtorch packages
|
# Build libtorch packages
|
||||||
@ -255,6 +272,10 @@ ls /tmp/$WHEELHOUSE_DIR
|
|||||||
mkdir -p "/$WHEELHOUSE_DIR"
|
mkdir -p "/$WHEELHOUSE_DIR"
|
||||||
mv /tmp/$WHEELHOUSE_DIR/torch*linux*.whl /$WHEELHOUSE_DIR/
|
mv /tmp/$WHEELHOUSE_DIR/torch*linux*.whl /$WHEELHOUSE_DIR/
|
||||||
|
|
||||||
|
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||||
|
mv /tmp/$WHEELHOUSE_DIR/torch_no_python*.whl /$WHEELHOUSE_DIR/ || true
|
||||||
|
fi
|
||||||
|
|
||||||
if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
if [[ -n "$BUILD_PYTHONLESS" ]]; then
|
||||||
mkdir -p /$LIBTORCH_HOUSE_DIR
|
mkdir -p /$LIBTORCH_HOUSE_DIR
|
||||||
mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR
|
mv /tmp/$LIBTORCH_HOUSE_DIR/*.zip /$LIBTORCH_HOUSE_DIR
|
||||||
@ -431,8 +452,16 @@ if [[ -z "$BUILD_PYTHONLESS" ]]; then
|
|||||||
pushd $PYTORCH_ROOT/test
|
pushd $PYTORCH_ROOT/test
|
||||||
|
|
||||||
# Install the wheel for this Python version
|
# Install the wheel for this Python version
|
||||||
|
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||||
|
pip uninstall -y "$TORCH_NO_PYTHON_PACKAGE_NAME" || true
|
||||||
|
fi
|
||||||
|
|
||||||
pip uninstall -y "$TORCH_PACKAGE_NAME"
|
pip uninstall -y "$TORCH_PACKAGE_NAME"
|
||||||
|
|
||||||
|
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||||
|
pip install "$TORCH_NO_PYTHON_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||||
|
fi
|
||||||
|
|
||||||
pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
pip install "$TORCH_PACKAGE_NAME" --no-index -f /$WHEELHOUSE_DIR --no-dependencies -v
|
||||||
|
|
||||||
# Print info on the libraries installed in this wheel
|
# Print info on the libraries installed in this wheel
|
||||||
|
|||||||
@ -66,9 +66,6 @@ case ${CUDA_VERSION} in
|
|||||||
TORCH_CUDA_ARCH_LIST="7.5;8.0;9.0;10.0;12.0+PTX"
|
TORCH_CUDA_ARCH_LIST="7.5;8.0;9.0;10.0;12.0+PTX"
|
||||||
fi
|
fi
|
||||||
;;
|
;;
|
||||||
13.0)
|
|
||||||
TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0;10.0;12.0+PTX"
|
|
||||||
;;
|
|
||||||
12.6)
|
12.6)
|
||||||
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0"
|
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0"
|
||||||
;;
|
;;
|
||||||
@ -113,18 +110,13 @@ DEPS_SONAME=(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# CUDA_VERSION 12.*, 13.*
|
# CUDA_VERSION 12.6, 12.8, 12.9
|
||||||
if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then
|
if [[ $CUDA_VERSION == 12* ]]; then
|
||||||
export USE_STATIC_CUDNN=0
|
export USE_STATIC_CUDNN=0
|
||||||
# Try parallelizing nvcc as well
|
# Try parallelizing nvcc as well
|
||||||
TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
|
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
|
||||||
# Compress the fatbin with -compress-mode=size for CUDA 13
|
|
||||||
if [[ $CUDA_VERSION == 13* ]]; then
|
|
||||||
export TORCH_NVCC_FLAGS="$TORCH_NVCC_FLAGS -compress-mode=size"
|
|
||||||
fi
|
|
||||||
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
|
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
|
||||||
echo "Bundling with cudnn and cublas."
|
echo "Bundling with cudnn and cublas."
|
||||||
|
|
||||||
DEPS_LIST+=(
|
DEPS_LIST+=(
|
||||||
"/usr/local/cuda/lib64/libcudnn_adv.so.9"
|
"/usr/local/cuda/lib64/libcudnn_adv.so.9"
|
||||||
"/usr/local/cuda/lib64/libcudnn_cnn.so.9"
|
"/usr/local/cuda/lib64/libcudnn_cnn.so.9"
|
||||||
@ -134,11 +126,15 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then
|
|||||||
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9"
|
"/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9"
|
||||||
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9"
|
"/usr/local/cuda/lib64/libcudnn_heuristic.so.9"
|
||||||
"/usr/local/cuda/lib64/libcudnn.so.9"
|
"/usr/local/cuda/lib64/libcudnn.so.9"
|
||||||
|
"/usr/local/cuda/lib64/libcublas.so.12"
|
||||||
|
"/usr/local/cuda/lib64/libcublasLt.so.12"
|
||||||
"/usr/local/cuda/lib64/libcusparseLt.so.0"
|
"/usr/local/cuda/lib64/libcusparseLt.so.0"
|
||||||
|
"/usr/local/cuda/lib64/libcudart.so.12"
|
||||||
|
"/usr/local/cuda/lib64/libnvrtc.so.12"
|
||||||
"/usr/local/cuda/lib64/libnvrtc-builtins.so"
|
"/usr/local/cuda/lib64/libnvrtc-builtins.so"
|
||||||
"/usr/local/cuda/lib64/libcufile.so.0"
|
"/usr/local/cuda/lib64/libcufile.so.0"
|
||||||
"/usr/local/cuda/lib64/libcufile_rdma.so.1"
|
"/usr/local/cuda/lib64/libcufile_rdma.so.1"
|
||||||
"/usr/local/cuda/lib64/libnvshmem_host.so.3"
|
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12"
|
||||||
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so"
|
"/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so"
|
||||||
)
|
)
|
||||||
DEPS_SONAME+=(
|
DEPS_SONAME+=(
|
||||||
@ -150,83 +146,41 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then
|
|||||||
"libcudnn_engines_precompiled.so.9"
|
"libcudnn_engines_precompiled.so.9"
|
||||||
"libcudnn_heuristic.so.9"
|
"libcudnn_heuristic.so.9"
|
||||||
"libcudnn.so.9"
|
"libcudnn.so.9"
|
||||||
|
"libcublas.so.12"
|
||||||
|
"libcublasLt.so.12"
|
||||||
"libcusparseLt.so.0"
|
"libcusparseLt.so.0"
|
||||||
|
"libcudart.so.12"
|
||||||
|
"libnvrtc.so.12"
|
||||||
"libnvrtc-builtins.so"
|
"libnvrtc-builtins.so"
|
||||||
"libnvshmem_host.so.3"
|
|
||||||
"libcufile.so.0"
|
"libcufile.so.0"
|
||||||
"libcufile_rdma.so.1"
|
"libcufile_rdma.so.1"
|
||||||
|
"libcupti.so.12"
|
||||||
"libnvperf_host.so"
|
"libnvperf_host.so"
|
||||||
)
|
)
|
||||||
# Add libnvToolsExt only if CUDA version is not 12.9
|
# Add libnvToolsExt only if CUDA version is not 12.9
|
||||||
if [[ $CUDA_VERSION == 13* ]]; then
|
if [[ $CUDA_VERSION != 12.9* ]]; then
|
||||||
DEPS_LIST+=(
|
DEPS_LIST+=("/usr/local/cuda/lib64/libnvToolsExt.so.1")
|
||||||
"/usr/local/cuda/lib64/libcublas.so.13"
|
DEPS_SONAME+=("libnvToolsExt.so.1")
|
||||||
"/usr/local/cuda/lib64/libcublasLt.so.13"
|
|
||||||
"/usr/local/cuda/lib64/libcudart.so.13"
|
|
||||||
"/usr/local/cuda/lib64/libnvrtc.so.13"
|
|
||||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13"
|
|
||||||
"/usr/local/cuda/lib64/libibverbs.so.1"
|
|
||||||
"/usr/local/cuda/lib64/librdmacm.so.1"
|
|
||||||
"/usr/local/cuda/lib64/libmlx5.so.1"
|
|
||||||
"/usr/local/cuda/lib64/libnl-3.so.200"
|
|
||||||
"/usr/local/cuda/lib64/libnl-route-3.so.200")
|
|
||||||
DEPS_SONAME+=(
|
|
||||||
"libcublas.so.13"
|
|
||||||
"libcublasLt.so.13"
|
|
||||||
"libcudart.so.13"
|
|
||||||
"libnvrtc.so.13"
|
|
||||||
"libcupti.so.13"
|
|
||||||
"libibverbs.so.1"
|
|
||||||
"librdmacm.so.1"
|
|
||||||
"libmlx5.so.1"
|
|
||||||
"libnl-3.so.200"
|
|
||||||
"libnl-route-3.so.200")
|
|
||||||
export USE_CUPTI_SO=1
|
|
||||||
export ATEN_STATIC_CUDA=0
|
|
||||||
export USE_CUDA_STATIC_LINK=0
|
|
||||||
export USE_CUFILE=0
|
|
||||||
else
|
|
||||||
DEPS_LIST+=(
|
|
||||||
"/usr/local/cuda/lib64/libnvToolsExt.so.1"
|
|
||||||
"/usr/local/cuda/lib64/libcublas.so.12"
|
|
||||||
"/usr/local/cuda/lib64/libcublasLt.so.12"
|
|
||||||
"/usr/local/cuda/lib64/libcudart.so.12"
|
|
||||||
"/usr/local/cuda/lib64/libnvrtc.so.12"
|
|
||||||
"/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12")
|
|
||||||
DEPS_SONAME+=(
|
|
||||||
"libnvToolsExt.so.1"
|
|
||||||
"libcublas.so.12"
|
|
||||||
"libcublasLt.so.12"
|
|
||||||
"libcudart.so.12"
|
|
||||||
"libnvrtc.so.12"
|
|
||||||
"libcupti.so.12")
|
|
||||||
fi
|
fi
|
||||||
else
|
else
|
||||||
echo "Using nvidia libs from pypi."
|
echo "Using nvidia libs from pypi."
|
||||||
CUDA_RPATHS=(
|
CUDA_RPATHS=(
|
||||||
'$ORIGIN/../../nvidia/cudnn/lib'
|
|
||||||
'$ORIGIN/../../nvidia/nvshmem/lib'
|
|
||||||
'$ORIGIN/../../nvidia/nccl/lib'
|
|
||||||
'$ORIGIN/../../nvidia/cusparselt/lib'
|
|
||||||
)
|
|
||||||
if [[ $CUDA_VERSION == 13* ]]; then
|
|
||||||
CUDA_RPATHS+=('$ORIGIN/../../nvidia/cu13/lib')
|
|
||||||
else
|
|
||||||
CUDA_RPATHS+=(
|
|
||||||
'$ORIGIN/../../nvidia/cublas/lib'
|
'$ORIGIN/../../nvidia/cublas/lib'
|
||||||
'$ORIGIN/../../nvidia/cuda_cupti/lib'
|
'$ORIGIN/../../nvidia/cuda_cupti/lib'
|
||||||
'$ORIGIN/../../nvidia/cuda_nvrtc/lib'
|
'$ORIGIN/../../nvidia/cuda_nvrtc/lib'
|
||||||
'$ORIGIN/../../nvidia/cuda_runtime/lib'
|
'$ORIGIN/../../nvidia/cuda_runtime/lib'
|
||||||
|
'$ORIGIN/../../nvidia/cudnn/lib'
|
||||||
'$ORIGIN/../../nvidia/cufft/lib'
|
'$ORIGIN/../../nvidia/cufft/lib'
|
||||||
'$ORIGIN/../../nvidia/curand/lib'
|
'$ORIGIN/../../nvidia/curand/lib'
|
||||||
'$ORIGIN/../../nvidia/cusolver/lib'
|
'$ORIGIN/../../nvidia/cusolver/lib'
|
||||||
'$ORIGIN/../../nvidia/cusparse/lib'
|
'$ORIGIN/../../nvidia/cusparse/lib'
|
||||||
|
'$ORIGIN/../../nvidia/cusparselt/lib'
|
||||||
'$ORIGIN/../../cusparselt/lib'
|
'$ORIGIN/../../cusparselt/lib'
|
||||||
|
'$ORIGIN/../../nvidia/nccl/lib'
|
||||||
|
'$ORIGIN/../../nvidia/nvshmem/lib'
|
||||||
'$ORIGIN/../../nvidia/nvtx/lib'
|
'$ORIGIN/../../nvidia/nvtx/lib'
|
||||||
'$ORIGIN/../../nvidia/cufile/lib'
|
'$ORIGIN/../../nvidia/cufile/lib'
|
||||||
)
|
)
|
||||||
fi
|
|
||||||
|
|
||||||
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
|
CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}")
|
||||||
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
|
export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib'
|
||||||
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
|
export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN'
|
||||||
|
|||||||
@ -25,7 +25,6 @@ source /opt/intel/oneapi/mpi/latest/env/vars.sh
|
|||||||
export USE_STATIC_MKL=1
|
export USE_STATIC_MKL=1
|
||||||
export USE_ONEMKL=1
|
export USE_ONEMKL=1
|
||||||
export USE_XCCL=1
|
export USE_XCCL=1
|
||||||
export USE_MPI=0
|
|
||||||
|
|
||||||
WHEELHOUSE_DIR="wheelhousexpu"
|
WHEELHOUSE_DIR="wheelhousexpu"
|
||||||
LIBTORCH_HOUSE_DIR="libtorch_housexpu"
|
LIBTORCH_HOUSE_DIR="libtorch_housexpu"
|
||||||
|
|||||||
@ -50,6 +50,9 @@ if [[ ${BUILD_ENVIRONMENT} == *"parallelnative"* ]]; then
|
|||||||
export ATEN_THREADING=NATIVE
|
export ATEN_THREADING=NATIVE
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Enable LLVM dependency for TensorExpr testing
|
||||||
|
export USE_LLVM=/opt/llvm
|
||||||
|
export LLVM_DIR=/opt/llvm/lib/cmake/llvm
|
||||||
|
|
||||||
if ! which conda; then
|
if ! which conda; then
|
||||||
# In ROCm CIs, we are doing cross compilation on build machines with
|
# In ROCm CIs, we are doing cross compilation on build machines with
|
||||||
@ -92,27 +95,6 @@ if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then
|
|||||||
export ACL_ROOT_DIR=/ComputeLibrary
|
export ACL_ROOT_DIR=/ComputeLibrary
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *riscv64* ]]; then
|
|
||||||
if [[ -f /opt/riscv-cross-env/bin/activate ]]; then
|
|
||||||
# shellcheck disable=SC1091
|
|
||||||
source /opt/riscv-cross-env/bin/activate
|
|
||||||
else
|
|
||||||
echo "Activation file not found"
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
export CMAKE_CROSSCOMPILING=TRUE
|
|
||||||
export CMAKE_SYSTEM_NAME=Linux
|
|
||||||
export CMAKE_SYSTEM_PROCESSOR=riscv64
|
|
||||||
|
|
||||||
export USE_CUDA=0
|
|
||||||
export USE_MKLDNN=0
|
|
||||||
|
|
||||||
export SLEEF_TARGET_EXEC_USE_QEMU=ON
|
|
||||||
sudo chown -R jenkins /var/lib/jenkins/workspace /opt
|
|
||||||
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then
|
||||||
POSSIBLE_JAVA_HOMES=()
|
POSSIBLE_JAVA_HOMES=()
|
||||||
POSSIBLE_JAVA_HOMES+=(/usr/local)
|
POSSIBLE_JAVA_HOMES+=(/usr/local)
|
||||||
@ -173,7 +155,6 @@ if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
|||||||
source /opt/intel/oneapi/mpi/latest/env/vars.sh
|
source /opt/intel/oneapi/mpi/latest/env/vars.sh
|
||||||
# Enable XCCL build
|
# Enable XCCL build
|
||||||
export USE_XCCL=1
|
export USE_XCCL=1
|
||||||
export USE_MPI=0
|
|
||||||
# XPU kineto feature dependencies are not fully ready, disable kineto build as temp WA
|
# XPU kineto feature dependencies are not fully ready, disable kineto build as temp WA
|
||||||
export USE_KINETO=0
|
export USE_KINETO=0
|
||||||
export TORCH_XPU_ARCH_LIST=pvc
|
export TORCH_XPU_ARCH_LIST=pvc
|
||||||
@ -195,16 +176,8 @@ fi
|
|||||||
|
|
||||||
# We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
|
# We only build FlashAttention files for CUDA 8.0+, and they require large amounts of
|
||||||
# memory to build and will OOM
|
# memory to build and will OOM
|
||||||
|
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && [[ 1 -eq $(echo "${TORCH_CUDA_ARCH_LIST} >= 8.0" | bc) ]]; then
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && echo "${TORCH_CUDA_ARCH_LIST}" | tr ' ' '\n' | sed 's/$/>= 8.0/' | bc | grep -q 1; then
|
export BUILD_CUSTOM_STEP="ninja -C build flash_attention -j 2"
|
||||||
J=2 # default to 2 jobs
|
|
||||||
case "$RUNNER" in
|
|
||||||
linux.12xlarge.memory|linux.24xlarge.memory)
|
|
||||||
J=24
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
echo "Building FlashAttention with job limit $J"
|
|
||||||
export BUILD_CUSTOM_STEP="ninja -C build flash_attention -j ${J}"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
|
if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then
|
||||||
@ -219,6 +192,7 @@ if [[ "$BUILD_ENVIRONMENT" == *-clang*-asan* ]]; then
|
|||||||
export USE_ASAN=1
|
export USE_ASAN=1
|
||||||
export REL_WITH_DEB_INFO=1
|
export REL_WITH_DEB_INFO=1
|
||||||
export UBSAN_FLAGS="-fno-sanitize-recover=all"
|
export UBSAN_FLAGS="-fno-sanitize-recover=all"
|
||||||
|
unset USE_LLVM
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ "${BUILD_ENVIRONMENT}" == *no-ops* ]]; then
|
if [[ "${BUILD_ENVIRONMENT}" == *no-ops* ]]; then
|
||||||
@ -239,7 +213,7 @@ fi
|
|||||||
|
|
||||||
# Do not change workspace permissions for ROCm and s390x CI jobs
|
# Do not change workspace permissions for ROCm and s390x CI jobs
|
||||||
# as it can leave workspace with bad permissions for cancelled jobs
|
# as it can leave workspace with bad permissions for cancelled jobs
|
||||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *riscv64* && -d /var/lib/jenkins/workspace ]]; then
|
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then
|
||||||
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
|
# Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96)
|
||||||
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
|
WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace")
|
||||||
cleanup_workspace() {
|
cleanup_workspace() {
|
||||||
@ -284,20 +258,30 @@ else
|
|||||||
# XLA test build fails when WERROR=1
|
# XLA test build fails when WERROR=1
|
||||||
# set only when building other architectures
|
# set only when building other architectures
|
||||||
# or building non-XLA tests.
|
# or building non-XLA tests.
|
||||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *xla* && "$BUILD_ENVIRONMENT" != *riscv64* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" != *rocm* &&
|
||||||
|
"$BUILD_ENVIRONMENT" != *xla* ]]; then
|
||||||
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
|
# Install numpy-2.0.2 for builds which are backward compatible with 1.X
|
||||||
python -mpip install numpy==2.0.2
|
python -mpip install numpy==2.0.2
|
||||||
|
|
||||||
WERROR=1 python setup.py clean
|
WERROR=1 python setup.py clean
|
||||||
|
|
||||||
|
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||||
|
python3 tools/packaging/split_wheel.py bdist_wheel
|
||||||
|
else
|
||||||
WERROR=1 python setup.py bdist_wheel
|
WERROR=1 python setup.py bdist_wheel
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
python setup.py clean
|
python setup.py clean
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *xla* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" == *xla* ]]; then
|
||||||
source .ci/pytorch/install_cache_xla.sh
|
source .ci/pytorch/install_cache_xla.sh
|
||||||
fi
|
fi
|
||||||
|
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||||
|
echo "USE_SPLIT_BUILD cannot be used with xla or rocm"
|
||||||
|
exit 1
|
||||||
|
else
|
||||||
python setup.py bdist_wheel
|
python setup.py bdist_wheel
|
||||||
fi
|
fi
|
||||||
|
fi
|
||||||
pip_install_whl "$(echo dist/*.whl)"
|
pip_install_whl "$(echo dist/*.whl)"
|
||||||
|
|
||||||
if [[ "${BUILD_ADDITIONAL_PACKAGES:-}" == *vision* ]]; then
|
if [[ "${BUILD_ADDITIONAL_PACKAGES:-}" == *vision* ]]; then
|
||||||
@ -421,7 +405,7 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]];
|
|||||||
# don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
|
# don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build
|
||||||
python tools/stats/export_test_times.py
|
python tools/stats/export_test_times.py
|
||||||
fi
|
fi
|
||||||
# don't do this for bazel or s390x or riscv64 as they don't use sccache
|
# don't do this for bazel or s390x as they don't use sccache
|
||||||
if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *riscv64* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then
|
||||||
print_sccache_stats
|
print_sccache_stats
|
||||||
fi
|
fi
|
||||||
|
|||||||
@ -67,7 +67,7 @@ fi
|
|||||||
# wheels with cxx11-abi
|
# wheels with cxx11-abi
|
||||||
|
|
||||||
echo "Checking that the gcc ABI is what we expect"
|
echo "Checking that the gcc ABI is what we expect"
|
||||||
if [[ "$(uname)" != 'Darwin' && "$(uname -m)" != "s390x" ]]; then
|
if [[ "$(uname)" != 'Darwin' ]]; then
|
||||||
# We also check that there are cxx11 symbols in libtorch
|
# We also check that there are cxx11 symbols in libtorch
|
||||||
#
|
#
|
||||||
echo "Checking that symbols in libtorch.so have the right gcc abi"
|
echo "Checking that symbols in libtorch.so have the right gcc abi"
|
||||||
@ -300,3 +300,24 @@ except RuntimeError as e:
|
|||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Check for C++ ABI compatibility to GCC-11 - GCC 13
|
||||||
|
###############################################################################
|
||||||
|
if [[ "$(uname)" == 'Linux' && "$PACKAGE_TYPE" == 'manywheel' ]]; then
|
||||||
|
pushd /tmp
|
||||||
|
# Per https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html
|
||||||
|
# gcc-11 is ABI16, gcc-13 is ABI18, gcc-14 is ABI19
|
||||||
|
# gcc 11 - CUDA 11.8, xpu, rocm
|
||||||
|
# gcc 13 - CUDA 12.6, 12.8 and cpu
|
||||||
|
# Please see issue for reference: https://github.com/pytorch/pytorch/issues/152426
|
||||||
|
if [[ "$(uname -m)" == "s390x" ]]; then
|
||||||
|
cxx_abi="19"
|
||||||
|
elif [[ "$DESIRED_CUDA" != 'xpu' && "$DESIRED_CUDA" != 'rocm'* ]]; then
|
||||||
|
cxx_abi="18"
|
||||||
|
else
|
||||||
|
cxx_abi="16"
|
||||||
|
fi
|
||||||
|
python -c "import torch; exit(0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi10${cxx_abi}' else 1)"
|
||||||
|
popd
|
||||||
|
fi
|
||||||
|
|||||||
@ -149,19 +149,6 @@ function get_pinned_commit() {
|
|||||||
cat .github/ci_commit_pins/"${1}".txt
|
cat .github/ci_commit_pins/"${1}".txt
|
||||||
}
|
}
|
||||||
|
|
||||||
function detect_cuda_arch() {
|
|
||||||
if [[ "${BUILD_ENVIRONMENT}" == *cuda* ]]; then
|
|
||||||
if command -v nvidia-smi; then
|
|
||||||
TORCH_CUDA_ARCH_LIST=$(nvidia-smi --query-gpu=compute_cap --format=csv | tail -n 1)
|
|
||||||
elif [[ "${TEST_CONFIG}" == *nogpu* ]]; then
|
|
||||||
# There won't be nvidia-smi in nogpu tests, so just set TORCH_CUDA_ARCH_LIST to the default
|
|
||||||
# minimum supported value here
|
|
||||||
TORCH_CUDA_ARCH_LIST=8.0
|
|
||||||
fi
|
|
||||||
export TORCH_CUDA_ARCH_LIST
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
function install_torchaudio() {
|
function install_torchaudio() {
|
||||||
local commit
|
local commit
|
||||||
commit=$(get_pinned_commit audio)
|
commit=$(get_pinned_commit audio)
|
||||||
@ -284,7 +271,7 @@ function install_torchrec_and_fbgemm() {
|
|||||||
|
|
||||||
function clone_pytorch_xla() {
|
function clone_pytorch_xla() {
|
||||||
if [[ ! -d ./xla ]]; then
|
if [[ ! -d ./xla ]]; then
|
||||||
git clone --recursive -b r2.9 https://github.com/pytorch/xla.git
|
git clone --recursive --quiet https://github.com/pytorch/xla.git
|
||||||
pushd xla
|
pushd xla
|
||||||
# pin the xla hash so that we don't get broken by changes to xla
|
# pin the xla hash so that we don't get broken by changes to xla
|
||||||
git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
|
git checkout "$(cat ../.github/ci_commit_pins/xla.txt)"
|
||||||
|
|||||||
@ -58,7 +58,7 @@ time python tools/setup_helpers/generate_code.py \
|
|||||||
|
|
||||||
# Build the docs
|
# Build the docs
|
||||||
pushd docs/cpp
|
pushd docs/cpp
|
||||||
time make VERBOSE=1 html
|
time make VERBOSE=1 html -j
|
||||||
|
|
||||||
popd
|
popd
|
||||||
popd
|
popd
|
||||||
|
|||||||
@ -174,15 +174,10 @@ checkout_install_torchbench() {
|
|||||||
# to install and test other models
|
# to install and test other models
|
||||||
python install.py --continue_on_fail
|
python install.py --continue_on_fail
|
||||||
fi
|
fi
|
||||||
popd
|
|
||||||
|
|
||||||
pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt
|
|
||||||
# https://github.com/pytorch/pytorch/issues/160689 to remove torchao because
|
|
||||||
# its current version 0.12.0 doesn't work with transformers 4.54.0
|
|
||||||
pip uninstall -y torchao
|
|
||||||
|
|
||||||
echo "Print all dependencies after TorchBench is installed"
|
echo "Print all dependencies after TorchBench is installed"
|
||||||
python -mpip freeze
|
python -mpip freeze
|
||||||
|
popd
|
||||||
}
|
}
|
||||||
|
|
||||||
torchbench_setup_macos() {
|
torchbench_setup_macos() {
|
||||||
@ -195,7 +190,7 @@ torchbench_setup_macos() {
|
|||||||
git checkout "$(cat ../.github/ci_commit_pins/vision.txt)"
|
git checkout "$(cat ../.github/ci_commit_pins/vision.txt)"
|
||||||
git submodule update --init --recursive
|
git submodule update --init --recursive
|
||||||
python setup.py clean
|
python setup.py clean
|
||||||
python -m pip install -e . -v --no-build-isolation
|
python setup.py develop
|
||||||
popd
|
popd
|
||||||
|
|
||||||
pushd torchaudio
|
pushd torchaudio
|
||||||
@ -204,7 +199,7 @@ torchbench_setup_macos() {
|
|||||||
git submodule update --init --recursive
|
git submodule update --init --recursive
|
||||||
python setup.py clean
|
python setup.py clean
|
||||||
#TODO: Remove me, when figure out how to make TorchAudio find brew installed openmp
|
#TODO: Remove me, when figure out how to make TorchAudio find brew installed openmp
|
||||||
USE_OPENMP=0 python -m pip install -e . -v --no-build-isolation
|
USE_OPENMP=0 python setup.py develop
|
||||||
popd
|
popd
|
||||||
|
|
||||||
checkout_install_torchbench
|
checkout_install_torchbench
|
||||||
@ -302,47 +297,6 @@ test_torchbench_smoketest() {
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
done
|
done
|
||||||
echo "Pytorch benchmark on mps device completed"
|
|
||||||
}
|
|
||||||
|
|
||||||
test_aoti_torchbench_smoketest() {
|
|
||||||
print_cmake_info
|
|
||||||
|
|
||||||
echo "Launching AOTInductor torchbench setup"
|
|
||||||
pip_benchmark_deps
|
|
||||||
# shellcheck disable=SC2119,SC2120
|
|
||||||
torchbench_setup_macos
|
|
||||||
|
|
||||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
|
||||||
mkdir -p "$TEST_REPORTS_DIR"
|
|
||||||
|
|
||||||
local device=mps
|
|
||||||
local dtypes=(undefined float16 bfloat16 notset)
|
|
||||||
local dtype=${dtypes[$1]}
|
|
||||||
local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16)
|
|
||||||
|
|
||||||
echo "Launching torchbench inference performance run for AOT Inductor and dtype ${dtype}"
|
|
||||||
local dtype_arg="--${dtype}"
|
|
||||||
if [ "$dtype" == notset ]; then
|
|
||||||
dtype_arg="--float32"
|
|
||||||
fi
|
|
||||||
touch "$TEST_REPORTS_DIR/aot_inductor_torchbench_${dtype}_inference_${device}_performance.csv"
|
|
||||||
for model in "${models[@]}"; do
|
|
||||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
|
||||||
--performance --only "$model" --export-aot-inductor --inference --devices "$device" "$dtype_arg" \
|
|
||||||
--output "$TEST_REPORTS_DIR/aot_inductor_torchbench_${dtype}_inference_${device}_performance.csv" || true
|
|
||||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \
|
|
||||||
--accuracy --only "$model" --export-aot-inductor --inference --devices "$device" "$dtype_arg" \
|
|
||||||
--output "$TEST_REPORTS_DIR/aot_inductor_torchbench_${dtype}_inference_${device}_accuracy.csv" || true
|
|
||||||
done
|
|
||||||
|
|
||||||
echo "Launching HuggingFace inference performance run for AOT Inductor and dtype ${dtype}"
|
|
||||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/huggingface.py \
|
|
||||||
--performance --export-aot-inductor --inference --devices "$device" "$dtype_arg" \
|
|
||||||
--output "$TEST_REPORTS_DIR/aot_inductor_huggingface_${dtype}_inference_${device}_performance.csv" || true
|
|
||||||
PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/huggingface.py \
|
|
||||||
--accuracy --export-aot-inductor --inference --devices "$device" "$dtype_arg" \
|
|
||||||
--output "$TEST_REPORTS_DIR/aot_inductor_huggingface_${dtype}_inference_${device}_accuracy.csv" || true
|
|
||||||
|
|
||||||
echo "Pytorch benchmark on mps device completed"
|
echo "Pytorch benchmark on mps device completed"
|
||||||
}
|
}
|
||||||
@ -391,8 +345,6 @@ elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then
|
|||||||
test_timm_perf
|
test_timm_perf
|
||||||
elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then
|
elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then
|
||||||
test_torchbench_smoketest "${SHARD_NUMBER}"
|
test_torchbench_smoketest "${SHARD_NUMBER}"
|
||||||
elif [[ $TEST_CONFIG == *"aot_inductor_perf_smoketest"* ]]; then
|
|
||||||
test_aoti_torchbench_smoketest "${SHARD_NUMBER}"
|
|
||||||
elif [[ $TEST_CONFIG == *"mps"* ]]; then
|
elif [[ $TEST_CONFIG == *"mps"* ]]; then
|
||||||
test_python_mps
|
test_python_mps
|
||||||
elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then
|
elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then
|
||||||
|
|||||||
@ -45,7 +45,6 @@ if [[ "${SHARD_NUMBER:-2}" == "2" ]]; then
|
|||||||
# DTensor tests
|
# DTensor tests
|
||||||
time python test/run_test.py --verbose -i distributed/tensor/test_random_ops
|
time python test/run_test.py --verbose -i distributed/tensor/test_random_ops
|
||||||
time python test/run_test.py --verbose -i distributed/tensor/test_dtensor_compile
|
time python test/run_test.py --verbose -i distributed/tensor/test_dtensor_compile
|
||||||
time python test/run_test.py --verbose -i distributed/tensor/test_utils.py
|
|
||||||
|
|
||||||
# DeviceMesh test
|
# DeviceMesh test
|
||||||
time python test/run_test.py --verbose -i distributed/test_device_mesh
|
time python test/run_test.py --verbose -i distributed/test_device_mesh
|
||||||
|
|||||||
@ -1,25 +0,0 @@
|
|||||||
From 6e08c9d08e9de59c7af28b720289debbbd384764 Mon Sep 17 00:00:00 2001
|
|
||||||
From: Michael Wang <13521008+isVoid@users.noreply.github.com>
|
|
||||||
Date: Tue, 1 Apr 2025 17:28:05 -0700
|
|
||||||
Subject: [PATCH] Avoid bumping certain driver API to avoid future breakage
|
|
||||||
(#185)
|
|
||||||
|
|
||||||
Co-authored-by: isVoid <isVoid@users.noreply.github.com>
|
|
||||||
---
|
|
||||||
numba_cuda/numba/cuda/cudadrv/driver.py | 3 +++
|
|
||||||
1 file changed, 3 insertions(+)
|
|
||||||
|
|
||||||
diff --git a/numba_cuda/numba/cuda/cudadrv/driver.py b/numba_cuda/numba/cuda/cudadrv/driver.py
|
|
||||||
index 1641bf77..233e9ed7 100644
|
|
||||||
--- a/numba_cuda/numba/cuda/cudadrv/driver.py
|
|
||||||
+++ b/numba_cuda/numba/cuda/cudadrv/driver.py
|
|
||||||
@@ -365,6 +365,9 @@ def _find_api(self, fname):
|
|
||||||
else:
|
|
||||||
variants = ('_v2', '')
|
|
||||||
|
|
||||||
+ if fname in ("cuCtxGetDevice", "cuCtxSynchronize"):
|
|
||||||
+ return getattr(self.lib, fname)
|
|
||||||
+
|
|
||||||
for variant in variants:
|
|
||||||
try:
|
|
||||||
return getattr(self.lib, f'{fname}{variant}')
|
|
||||||
@ -32,9 +32,6 @@ LIBTORCH_NAMESPACE_LIST = (
|
|||||||
"torch::",
|
"torch::",
|
||||||
)
|
)
|
||||||
|
|
||||||
# Patterns for detecting statically linked libstdc++ symbols
|
|
||||||
STATICALLY_LINKED_CXX11_ABI = [re.compile(r".*recursive_directory_iterator.*")]
|
|
||||||
|
|
||||||
|
|
||||||
def _apply_libtorch_symbols(symbols):
|
def _apply_libtorch_symbols(symbols):
|
||||||
return [
|
return [
|
||||||
@ -56,17 +53,12 @@ def get_symbols(lib: str) -> list[tuple[str, str, str]]:
|
|||||||
return [x.split(" ", 2) for x in lines.decode("latin1").split("\n")[:-1]]
|
return [x.split(" ", 2) for x in lines.decode("latin1").split("\n")[:-1]]
|
||||||
|
|
||||||
|
|
||||||
def grep_symbols(
|
def grep_symbols(lib: str, patterns: list[Any]) -> list[str]:
|
||||||
lib: str, patterns: list[Any], symbol_type: str | None = None
|
|
||||||
) -> list[str]:
|
|
||||||
def _grep_symbols(
|
def _grep_symbols(
|
||||||
symbols: list[tuple[str, str, str]], patterns: list[Any]
|
symbols: list[tuple[str, str, str]], patterns: list[Any]
|
||||||
) -> list[str]:
|
) -> list[str]:
|
||||||
rc = []
|
rc = []
|
||||||
for _s_addr, _s_type, s_name in symbols:
|
for _s_addr, _s_type, s_name in symbols:
|
||||||
# Filter by symbol type if specified
|
|
||||||
if symbol_type and _s_type != symbol_type:
|
|
||||||
continue
|
|
||||||
for pattern in patterns:
|
for pattern in patterns:
|
||||||
if pattern.match(s_name):
|
if pattern.match(s_name):
|
||||||
rc.append(s_name)
|
rc.append(s_name)
|
||||||
@ -88,18 +80,6 @@ def grep_symbols(
|
|||||||
return functools.reduce(list.__add__, (x.result() for x in tasks), [])
|
return functools.reduce(list.__add__, (x.result() for x in tasks), [])
|
||||||
|
|
||||||
|
|
||||||
def check_lib_statically_linked_libstdc_cxx_abi_symbols(lib: str) -> None:
|
|
||||||
cxx11_statically_linked_symbols = grep_symbols(
|
|
||||||
lib, STATICALLY_LINKED_CXX11_ABI, symbol_type="T"
|
|
||||||
)
|
|
||||||
num_statically_linked_symbols = len(cxx11_statically_linked_symbols)
|
|
||||||
print(f"num_statically_linked_symbols (T): {num_statically_linked_symbols}")
|
|
||||||
if num_statically_linked_symbols > 0:
|
|
||||||
raise RuntimeError(
|
|
||||||
f"Found statically linked libstdc++ symbols (recursive_directory_iterator): {cxx11_statically_linked_symbols[:100]}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def check_lib_symbols_for_abi_correctness(lib: str) -> None:
|
def check_lib_symbols_for_abi_correctness(lib: str) -> None:
|
||||||
print(f"lib: {lib}")
|
print(f"lib: {lib}")
|
||||||
cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS)
|
cxx11_symbols = grep_symbols(lib, LIBTORCH_CXX11_PATTERNS)
|
||||||
@ -127,7 +107,6 @@ def main() -> None:
|
|||||||
|
|
||||||
libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so")
|
libtorch_cpu_path = str(install_root / "lib" / "libtorch_cpu.so")
|
||||||
check_lib_symbols_for_abi_correctness(libtorch_cpu_path)
|
check_lib_symbols_for_abi_correctness(libtorch_cpu_path)
|
||||||
check_lib_statically_linked_libstdc_cxx_abi_symbols(libtorch_cpu_path)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@ -32,16 +32,6 @@ if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /v
|
|||||||
git config --global --add safe.directory /var/lib/jenkins/workspace
|
git config --global --add safe.directory /var/lib/jenkins/workspace
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
# Patch numba to avoid CUDA-13 crash, see https://github.com/pytorch/pytorch/issues/162878
|
|
||||||
NUMBA_CUDA_DIR=$(python -c "import os;import numba.cuda; print(os.path.dirname(numba.cuda.__file__))" 2>/dev/null || true)
|
|
||||||
if [ -n "$NUMBA_CUDA_DIR" ]; then
|
|
||||||
NUMBA_PATCH="$(dirname "$(realpath "${BASH_SOURCE[0]}")")/numba-cuda-13.patch"
|
|
||||||
pushd "$NUMBA_CUDA_DIR"
|
|
||||||
patch -p4 <"$NUMBA_PATCH"
|
|
||||||
popd
|
|
||||||
fi
|
|
||||||
|
|
||||||
echo "Environment variables:"
|
echo "Environment variables:"
|
||||||
env
|
env
|
||||||
|
|
||||||
@ -101,7 +91,6 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then
|
|||||||
export VALGRIND=OFF
|
export VALGRIND=OFF
|
||||||
fi
|
fi
|
||||||
|
|
||||||
detect_cuda_arch
|
|
||||||
|
|
||||||
if [[ "$BUILD_ENVIRONMENT" == *s390x* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" == *s390x* ]]; then
|
||||||
# There are additional warnings on s390x, maybe due to newer gcc.
|
# There are additional warnings on s390x, maybe due to newer gcc.
|
||||||
@ -506,14 +495,6 @@ test_inductor_cpp_wrapper_shard() {
|
|||||||
-k 'take' \
|
-k 'take' \
|
||||||
--shard "$1" "$NUM_TEST_SHARDS" \
|
--shard "$1" "$NUM_TEST_SHARDS" \
|
||||||
--verbose
|
--verbose
|
||||||
|
|
||||||
if [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then
|
|
||||||
python test/run_test.py \
|
|
||||||
--include inductor/test_mkldnn_pattern_matcher \
|
|
||||||
-k 'xpu' \
|
|
||||||
--shard "$1" "$NUM_TEST_SHARDS" \
|
|
||||||
--verbose
|
|
||||||
fi
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# "Global" flags for inductor benchmarking controlled by TEST_CONFIG
|
# "Global" flags for inductor benchmarking controlled by TEST_CONFIG
|
||||||
@ -1070,10 +1051,20 @@ test_libtorch_api() {
|
|||||||
mkdir -p $TEST_REPORTS_DIR
|
mkdir -p $TEST_REPORTS_DIR
|
||||||
|
|
||||||
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" "$TORCH_BIN_DIR"/test_api --gtest_filter='-IMethodTest.*' --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml
|
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" "$TORCH_BIN_DIR"/test_api --gtest_filter='-IMethodTest.*' --gtest_output=xml:$TEST_REPORTS_DIR/test_api.xml
|
||||||
|
"$TORCH_BIN_DIR"/test_tensorexpr --gtest_output=xml:$TEST_REPORTS_DIR/test_tensorexpr.xml
|
||||||
else
|
else
|
||||||
# Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy
|
# Exclude IMethodTest that relies on torch::deploy, which will instead be ran in test_deploy
|
||||||
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest"
|
OMP_NUM_THREADS=2 TORCH_CPP_TEST_MNIST_PATH="${MNIST_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_api -k "not IMethodTest"
|
||||||
|
|
||||||
|
# On s390x, pytorch is built without llvm.
|
||||||
|
# Even if it would be built with llvm, llvm currently doesn't support used features on s390x and
|
||||||
|
# test fails with errors like:
|
||||||
|
# JIT session error: Unsupported target machine architecture in ELF object pytorch-jitted-objectbuffer
|
||||||
|
# unknown file: Failure
|
||||||
|
# C++ exception with description "valOrErr INTERNAL ASSERT FAILED at "/var/lib/jenkins/workspace/torch/csrc/jit/tensorexpr/llvm_jit.h":34, please report a bug to PyTorch. Unexpected failure in LLVM JIT: Failed to materialize symbols: { (main, { func }) }
|
||||||
|
if [[ "${BUILD_ENVIRONMENT}" != *s390x* ]]; then
|
||||||
|
python test/run_test.py --cpp --verbose -i cpp/test_tensorexpr
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# quantization is not fully supported on s390x yet
|
# quantization is not fully supported on s390x yet
|
||||||
@ -1624,25 +1615,6 @@ test_operator_benchmark() {
|
|||||||
--expected "expected_ci_operator_benchmark_eager_float32_cpu.csv"
|
--expected "expected_ci_operator_benchmark_eager_float32_cpu.csv"
|
||||||
}
|
}
|
||||||
|
|
||||||
test_operator_microbenchmark() {
|
|
||||||
TEST_REPORTS_DIR=$(pwd)/test/test-reports
|
|
||||||
mkdir -p "$TEST_REPORTS_DIR"
|
|
||||||
TEST_DIR=$(pwd)
|
|
||||||
|
|
||||||
cd benchmarks/operator_benchmark/pt_extension
|
|
||||||
python -m pip install .
|
|
||||||
|
|
||||||
cd "${TEST_DIR}"/benchmarks/operator_benchmark
|
|
||||||
|
|
||||||
for OP_BENCHMARK_TESTS in matmul mm addmm bmm; do
|
|
||||||
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
|
|
||||||
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}_compile.json" \
|
|
||||||
--benchmark-name "PyTorch operator microbenchmark" --use-compile
|
|
||||||
$TASKSET python -m pt.${OP_BENCHMARK_TESTS}_test --tag-filter long \
|
|
||||||
--output-json-for-dashboard "${TEST_REPORTS_DIR}/operator_microbenchmark_${OP_BENCHMARK_TESTS}.json" \
|
|
||||||
--benchmark-name "PyTorch operator microbenchmark"
|
|
||||||
done
|
|
||||||
}
|
|
||||||
|
|
||||||
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
|
if ! [[ "${BUILD_ENVIRONMENT}" == *libtorch* || "${BUILD_ENVIRONMENT}" == *-bazel-* ]]; then
|
||||||
(cd test && python -c "import torch; print(torch.__config__.show())")
|
(cd test && python -c "import torch; print(torch.__config__.show())")
|
||||||
@ -1667,10 +1639,6 @@ elif [[ "${TEST_CONFIG}" == *xla* ]]; then
|
|||||||
install_torchvision
|
install_torchvision
|
||||||
build_xla
|
build_xla
|
||||||
test_xla
|
test_xla
|
||||||
elif [[ "$TEST_CONFIG" == *vllm* ]]; then
|
|
||||||
echo "vLLM CI uses TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST"
|
|
||||||
(cd .ci/lumen_cli && python -m pip install -e .)
|
|
||||||
python -m cli.run test external vllm --test-plan "$TEST_CONFIG" --shard-id "$SHARD_NUMBER" --num-shards "$NUM_TEST_SHARDS"
|
|
||||||
elif [[ "${TEST_CONFIG}" == *executorch* ]]; then
|
elif [[ "${TEST_CONFIG}" == *executorch* ]]; then
|
||||||
test_executorch
|
test_executorch
|
||||||
elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then
|
elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then
|
||||||
@ -1697,8 +1665,6 @@ elif [[ "${TEST_CONFIG}" == *operator_benchmark* ]]; then
|
|||||||
test_operator_benchmark cpu ${TEST_MODE}
|
test_operator_benchmark cpu ${TEST_MODE}
|
||||||
|
|
||||||
fi
|
fi
|
||||||
elif [[ "${TEST_CONFIG}" == *operator_microbenchmark* ]]; then
|
|
||||||
test_operator_microbenchmark
|
|
||||||
elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
|
elif [[ "${TEST_CONFIG}" == *inductor_distributed* ]]; then
|
||||||
test_inductor_distributed
|
test_inductor_distributed
|
||||||
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
|
elif [[ "${TEST_CONFIG}" == *inductor-halide* ]]; then
|
||||||
@ -1726,6 +1692,7 @@ elif [[ "${TEST_CONFIG}" == verify_cachebench ]]; then
|
|||||||
elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
|
||||||
install_torchaudio
|
install_torchaudio
|
||||||
install_torchvision
|
install_torchvision
|
||||||
|
install_torchao
|
||||||
id=$((SHARD_NUMBER-1))
|
id=$((SHARD_NUMBER-1))
|
||||||
# https://github.com/opencv/opencv-python/issues/885
|
# https://github.com/opencv/opencv-python/issues/885
|
||||||
pip_install opencv-python==4.8.0.74
|
pip_install opencv-python==4.8.0.74
|
||||||
@ -1752,6 +1719,11 @@ elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper* ]]; then
|
|||||||
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
elif [[ "${TEST_CONFIG}" == *inductor* ]]; then
|
||||||
install_torchvision
|
install_torchvision
|
||||||
test_inductor_shard "${SHARD_NUMBER}"
|
test_inductor_shard "${SHARD_NUMBER}"
|
||||||
|
if [[ "${SHARD_NUMBER}" == 1 ]]; then
|
||||||
|
if [[ "${BUILD_ENVIRONMENT}" != linux-jammy-py3.9-gcc11-build ]]; then
|
||||||
|
test_inductor_distributed
|
||||||
|
fi
|
||||||
|
fi
|
||||||
elif [[ "${TEST_CONFIG}" == *einops* ]]; then
|
elif [[ "${TEST_CONFIG}" == *einops* ]]; then
|
||||||
test_einops
|
test_einops
|
||||||
elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
|
elif [[ "${TEST_CONFIG}" == *dynamo_wrapped* ]]; then
|
||||||
|
|||||||
@ -61,10 +61,9 @@ if "%USE_XPU%"=="1" (
|
|||||||
call "C:\Program Files (x86)\Intel\oneAPI\compiler\latest\env\vars.bat"
|
call "C:\Program Files (x86)\Intel\oneAPI\compiler\latest\env\vars.bat"
|
||||||
call "C:\Program Files (x86)\Intel\oneAPI\ocloc\latest\env\vars.bat"
|
call "C:\Program Files (x86)\Intel\oneAPI\ocloc\latest\env\vars.bat"
|
||||||
if errorlevel 1 exit /b 1
|
if errorlevel 1 exit /b 1
|
||||||
:: Reduce build time
|
:: Reduce build time. Only have MTL self-hosted runner now
|
||||||
SET TORCH_XPU_ARCH_LIST=bmg
|
SET TORCH_XPU_ARCH_LIST=xe-lpg
|
||||||
:: Re-setup python env for build
|
SET USE_KINETO=0
|
||||||
call pip install -r requirements.txt
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@echo on
|
@echo on
|
||||||
@ -137,7 +136,7 @@ sccache --show-stats
|
|||||||
python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
|
python -c "import os, glob; os.system('python -mpip install --no-index --no-deps ' + glob.glob('dist/*.whl')[0])"
|
||||||
(
|
(
|
||||||
if "%BUILD_ENVIRONMENT%"=="" (
|
if "%BUILD_ENVIRONMENT%"=="" (
|
||||||
echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_ROOT_DIR%\Scripts\activate.bat %CONDA_ROOT_DIR%\envs\py_tmp` in Command Prompt before running Git Bash.
|
echo NOTE: To run `import torch`, please make sure to activate the conda environment by running `call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3` in Command Prompt before running Git Bash.
|
||||||
) else (
|
) else (
|
||||||
copy /Y "dist\*.whl" "%PYTORCH_FINAL_PACKAGE_DIR%"
|
copy /Y "dist\*.whl" "%PYTORCH_FINAL_PACKAGE_DIR%"
|
||||||
|
|
||||||
|
|||||||
@ -3,12 +3,12 @@ if "%BUILD_ENVIRONMENT%"=="" (
|
|||||||
) else (
|
) else (
|
||||||
set CONDA_PARENT_DIR=C:\Jenkins
|
set CONDA_PARENT_DIR=C:\Jenkins
|
||||||
)
|
)
|
||||||
set CONDA_ROOT_DIR=%CONDA_PARENT_DIR%\Miniconda3
|
|
||||||
|
|
||||||
:: Be conservative here when rolling out the new AMI with conda. This will try
|
:: Be conservative here when rolling out the new AMI with conda. This will try
|
||||||
:: to install conda as before if it couldn't find the conda installation. This
|
:: to install conda as before if it couldn't find the conda installation. This
|
||||||
:: can be removed eventually after we gain enough confidence in the AMI
|
:: can be removed eventually after we gain enough confidence in the AMI
|
||||||
if not exist %CONDA_ROOT_DIR% (
|
if not exist %CONDA_PARENT_DIR%\Miniconda3 (
|
||||||
set INSTALL_FRESH_CONDA=1
|
set INSTALL_FRESH_CONDA=1
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -17,14 +17,10 @@ if "%INSTALL_FRESH_CONDA%"=="1" (
|
|||||||
if errorlevel 1 exit /b
|
if errorlevel 1 exit /b
|
||||||
if not errorlevel 0 exit /b
|
if not errorlevel 0 exit /b
|
||||||
|
|
||||||
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_ROOT_DIR%
|
%TMP_DIR_WIN%\Miniconda3-latest-Windows-x86_64.exe /InstallationType=JustMe /RegisterPython=0 /S /AddToPath=0 /D=%CONDA_PARENT_DIR%\Miniconda3
|
||||||
if errorlevel 1 exit /b
|
if errorlevel 1 exit /b
|
||||||
if not errorlevel 0 exit /b
|
if not errorlevel 0 exit /b
|
||||||
)
|
)
|
||||||
|
|
||||||
:: Activate conda so that we can use its commands, i.e. conda, python, pip
|
:: Activate conda so that we can use its commands, i.e. conda, python, pip
|
||||||
call %CONDA_ROOT_DIR%\Scripts\activate.bat %CONDA_ROOT_DIR%
|
call %CONDA_PARENT_DIR%\Miniconda3\Scripts\activate.bat %CONDA_PARENT_DIR%\Miniconda3
|
||||||
:: Activate conda so that we can use its commands, i.e. conda, python, pip
|
|
||||||
call conda activate py_tmp
|
|
||||||
|
|
||||||
call pip install -r .ci/docker/requirements-ci.txt
|
|
||||||
|
|||||||
@ -14,7 +14,7 @@ if not errorlevel 0 exit /b
|
|||||||
:: build\torch. Rather than changing all these references, making a copy of torch folder
|
:: build\torch. Rather than changing all these references, making a copy of torch folder
|
||||||
:: from conda to the current workspace is easier. The workspace will be cleaned up after
|
:: from conda to the current workspace is easier. The workspace will be cleaned up after
|
||||||
:: the job anyway
|
:: the job anyway
|
||||||
xcopy /s %CONDA_ROOT_DIR%\envs\py_tmp\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
|
xcopy /s %CONDA_PARENT_DIR%\Miniconda3\Lib\site-packages\torch %TMP_DIR_WIN%\build\torch\
|
||||||
|
|
||||||
pushd .
|
pushd .
|
||||||
if "%VC_VERSION%" == "" (
|
if "%VC_VERSION%" == "" (
|
||||||
|
|||||||
@ -38,20 +38,13 @@ if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
|||||||
fi
|
fi
|
||||||
|
|
||||||
# TODO: Move both of them to Windows AMI
|
# TODO: Move both of them to Windows AMI
|
||||||
python -m pip install tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1
|
python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard==2.13.0 protobuf==5.29.4 pytest-subtests==0.13.1
|
||||||
|
|
||||||
# Copied from https://github.com/pytorch/test-infra/blob/be01a40157c36cd5a48391fdf44a7bc3ebd4c7e3/aws/ami/windows/scripts/Installers/Install-Pip-Dependencies.ps1#L16 with some adjustments
|
|
||||||
# pytest-rerunfailures==10.3 as 10.2 fails with INTERNALERROR> pluggy._manager.PluginValidationError: unknown hook 'pytest_configure_node'
|
|
||||||
# scipy from 1.6.3 to 1.10
|
|
||||||
# expecttest from 0.1.3 to 0.3.0
|
|
||||||
# xdoctest from 1.0.2 to 1.3.0
|
|
||||||
python -m pip install "future==0.18.2" "hypothesis==5.35.1" "expecttest==0.3.0" "librosa>=0.6.2" "scipy==1.10.1" "psutil==5.9.1" "pynvml==11.4.1" "pillow==9.2.0" "unittest-xml-reporting<=3.2.0,>=2.0.0" "pytest==7.1.3" "pytest-xdist==2.5.0" "pytest-flakefinder==1.1.0" "pytest-rerunfailures==10.3" "pytest-shard==0.1.2" "sympy==1.11.1" "xdoctest==1.3.0" "pygments==2.12.0" "opt-einsum>=3.3" "networkx==2.8.8" "mpmath==1.2.1" "pytest-cpp==2.3.0" "boto3==1.35.42"
|
|
||||||
|
|
||||||
# Install Z3 optional dependency for Windows builds.
|
# Install Z3 optional dependency for Windows builds.
|
||||||
python -m pip install z3-solver==4.15.1.0
|
python -m pip install z3-solver==4.15.1.0
|
||||||
|
|
||||||
# Install tlparse for test\dynamo\test_structured_trace.py UTs.
|
# Install tlparse for test\dynamo\test_structured_trace.py UTs.
|
||||||
python -m pip install tlparse==0.4.0
|
python -m pip install tlparse==0.3.30
|
||||||
|
|
||||||
# Install parameterized
|
# Install parameterized
|
||||||
python -m pip install parameterized==0.8.1
|
python -m pip install parameterized==0.8.1
|
||||||
@ -59,6 +52,9 @@ python -m pip install parameterized==0.8.1
|
|||||||
# Install pulp for testing ilps under torch\distributed\_tools
|
# Install pulp for testing ilps under torch\distributed\_tools
|
||||||
python -m pip install pulp==2.9.0
|
python -m pip install pulp==2.9.0
|
||||||
|
|
||||||
|
# Install expecttest to merge https://github.com/pytorch/pytorch/pull/155308
|
||||||
|
python -m pip install expecttest==0.3.0
|
||||||
|
|
||||||
run_tests() {
|
run_tests() {
|
||||||
# Run nvidia-smi if available
|
# Run nvidia-smi if available
|
||||||
for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
|
for path in '/c/Program Files/NVIDIA Corporation/NVSMI/nvidia-smi.exe' /c/Windows/System32/nvidia-smi.exe; do
|
||||||
|
|||||||
@ -37,7 +37,7 @@ IF "%CUDA_PATH_V126%"=="" (
|
|||||||
)
|
)
|
||||||
|
|
||||||
IF "%BUILD_VISION%" == "" (
|
IF "%BUILD_VISION%" == "" (
|
||||||
set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0
|
set TORCH_CUDA_ARCH_LIST=6.1;7.0;7.5;8.0;8.6;9.0
|
||||||
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
|
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
|
||||||
) ELSE (
|
) ELSE (
|
||||||
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90
|
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90
|
||||||
|
|||||||
@ -37,10 +37,10 @@ IF "%CUDA_PATH_V128%"=="" (
|
|||||||
)
|
)
|
||||||
|
|
||||||
IF "%BUILD_VISION%" == "" (
|
IF "%BUILD_VISION%" == "" (
|
||||||
set TORCH_CUDA_ARCH_LIST=7.0;7.5;8.0;8.6;9.0;10.0;12.0
|
set TORCH_CUDA_ARCH_LIST=6.1;7.0;7.5;8.0;8.6;9.0;10.0;12.0
|
||||||
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
|
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
|
||||||
) ELSE (
|
) ELSE (
|
||||||
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
|
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
|
||||||
)
|
)
|
||||||
|
|
||||||
set "CUDA_PATH=%CUDA_PATH_V128%"
|
set "CUDA_PATH=%CUDA_PATH_V128%"
|
||||||
|
|||||||
@ -1,59 +0,0 @@
|
|||||||
@echo off
|
|
||||||
|
|
||||||
set MODULE_NAME=pytorch
|
|
||||||
|
|
||||||
IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" (
|
|
||||||
call internal\clone.bat
|
|
||||||
cd %~dp0
|
|
||||||
) ELSE (
|
|
||||||
call internal\clean.bat
|
|
||||||
)
|
|
||||||
IF ERRORLEVEL 1 goto :eof
|
|
||||||
|
|
||||||
call internal\check_deps.bat
|
|
||||||
IF ERRORLEVEL 1 goto :eof
|
|
||||||
|
|
||||||
REM Check for optional components
|
|
||||||
|
|
||||||
set USE_CUDA=
|
|
||||||
set CMAKE_GENERATOR=Visual Studio 15 2017 Win64
|
|
||||||
|
|
||||||
IF "%NVTOOLSEXT_PATH%"=="" (
|
|
||||||
IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib" (
|
|
||||||
set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt
|
|
||||||
) ELSE (
|
|
||||||
echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing
|
|
||||||
exit /b 1
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
IF "%CUDA_PATH_V130%"=="" (
|
|
||||||
IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin\nvcc.exe" (
|
|
||||||
set "CUDA_PATH_V130=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0"
|
|
||||||
) ELSE (
|
|
||||||
echo CUDA 13.0 not found, failing
|
|
||||||
exit /b 1
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
IF "%BUILD_VISION%" == "" (
|
|
||||||
set TORCH_CUDA_ARCH_LIST=7.5;8.0;8.6;9.0;10.0;12.0
|
|
||||||
set TORCH_NVCC_FLAGS=-Xfatbin -compress-all
|
|
||||||
) ELSE (
|
|
||||||
set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120
|
|
||||||
)
|
|
||||||
|
|
||||||
set "CUDA_PATH=%CUDA_PATH_V130%"
|
|
||||||
set "PATH=%CUDA_PATH_V130%\bin;%PATH%"
|
|
||||||
|
|
||||||
:optcheck
|
|
||||||
|
|
||||||
call internal\check_opts.bat
|
|
||||||
IF ERRORLEVEL 1 goto :eof
|
|
||||||
|
|
||||||
if exist "%NIGHTLIES_PYTORCH_ROOT%" cd %NIGHTLIES_PYTORCH_ROOT%\..
|
|
||||||
call %~dp0\internal\copy.bat
|
|
||||||
IF ERRORLEVEL 1 goto :eof
|
|
||||||
|
|
||||||
call %~dp0\internal\setup.bat
|
|
||||||
IF ERRORLEVEL 1 goto :eof
|
|
||||||
@ -1,20 +1,12 @@
|
|||||||
|
copy "%CUDA_PATH%\bin\cusparse*64_*.dll*" pytorch\torch\lib
|
||||||
if %CUDA_VERSION% geq 130 (
|
copy "%CUDA_PATH%\bin\cublas*64_*.dll*" pytorch\torch\lib
|
||||||
set "dll_path=bin\x64"
|
copy "%CUDA_PATH%\bin\cudart*64_*.dll*" pytorch\torch\lib
|
||||||
) else (
|
copy "%CUDA_PATH%\bin\curand*64_*.dll*" pytorch\torch\lib
|
||||||
set "dll_path=bin"
|
copy "%CUDA_PATH%\bin\cufft*64_*.dll*" pytorch\torch\lib
|
||||||
)
|
copy "%CUDA_PATH%\bin\cusolver*64_*.dll*" pytorch\torch\lib
|
||||||
|
|
||||||
copy "%CUDA_PATH%\%dll_path%\cusparse*64_*.dll*" pytorch\torch\lib
|
|
||||||
copy "%CUDA_PATH%\%dll_path%\cublas*64_*.dll*" pytorch\torch\lib
|
|
||||||
copy "%CUDA_PATH%\%dll_path%\cudart*64_*.dll*" pytorch\torch\lib
|
|
||||||
copy "%CUDA_PATH%\%dll_path%\curand*64_*.dll*" pytorch\torch\lib
|
|
||||||
copy "%CUDA_PATH%\%dll_path%\cufft*64_*.dll*" pytorch\torch\lib
|
|
||||||
copy "%CUDA_PATH%\%dll_path%\cusolver*64_*.dll*" pytorch\torch\lib
|
|
||||||
copy "%CUDA_PATH%\%dll_path%\nvrtc*64_*.dll*" pytorch\torch\lib
|
|
||||||
copy "%CUDA_PATH%\%dll_path%\nvJitLink_*.dll*" pytorch\torch\lib
|
|
||||||
|
|
||||||
copy "%CUDA_PATH%\bin\cudnn*64_*.dll*" pytorch\torch\lib
|
copy "%CUDA_PATH%\bin\cudnn*64_*.dll*" pytorch\torch\lib
|
||||||
|
copy "%CUDA_PATH%\bin\nvrtc*64_*.dll*" pytorch\torch\lib
|
||||||
copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
|
copy "%CUDA_PATH%\extras\CUPTI\lib64\cupti64_*.dll*" pytorch\torch\lib
|
||||||
copy "%CUDA_PATH%\extras\CUPTI\lib64\nvperf_host*.dll*" pytorch\torch\lib
|
copy "%CUDA_PATH%\extras\CUPTI\lib64\nvperf_host*.dll*" pytorch\torch\lib
|
||||||
|
|
||||||
@ -28,3 +20,8 @@ copy "%libuv_ROOT%\bin\uv.dll" pytorch\torch\lib
|
|||||||
if exist "C:\Windows\System32\zlibwapi.dll" (
|
if exist "C:\Windows\System32\zlibwapi.dll" (
|
||||||
copy "C:\Windows\System32\zlibwapi.dll" pytorch\torch\lib
|
copy "C:\Windows\System32\zlibwapi.dll" pytorch\torch\lib
|
||||||
)
|
)
|
||||||
|
|
||||||
|
::copy nvJitLink dll is requires for cuda 12+
|
||||||
|
if exist "%CUDA_PATH%\bin\nvJitLink_*.dll*" (
|
||||||
|
copy "%CUDA_PATH%\bin\nvJitLink_*.dll*" pytorch\torch\lib
|
||||||
|
)
|
||||||
|
|||||||
@ -26,7 +26,6 @@ if exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR%
|
|||||||
if %CUDA_VER% EQU 126 goto cuda126
|
if %CUDA_VER% EQU 126 goto cuda126
|
||||||
if %CUDA_VER% EQU 128 goto cuda128
|
if %CUDA_VER% EQU 128 goto cuda128
|
||||||
if %CUDA_VER% EQU 129 goto cuda129
|
if %CUDA_VER% EQU 129 goto cuda129
|
||||||
if %CUDA_VER% EQU 130 goto cuda130
|
|
||||||
|
|
||||||
echo CUDA %CUDA_VERSION_STR% is not supported
|
echo CUDA %CUDA_VERSION_STR% is not supported
|
||||||
exit /b 1
|
exit /b 1
|
||||||
@ -114,33 +113,6 @@ xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32"
|
|||||||
|
|
||||||
goto cuda_common
|
goto cuda_common
|
||||||
|
|
||||||
:cuda130
|
|
||||||
|
|
||||||
set CUDA_INSTALL_EXE=cuda_13.0.0_windows.exe
|
|
||||||
if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" (
|
|
||||||
curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" & REM @lint-ignore
|
|
||||||
if errorlevel 1 exit /b 1
|
|
||||||
set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%"
|
|
||||||
set "ARGS="
|
|
||||||
)
|
|
||||||
|
|
||||||
set CUDNN_FOLDER=cudnn-windows-x86_64-9.12.0.46_cuda13-archive
|
|
||||||
set CUDNN_LIB_FOLDER="lib"
|
|
||||||
set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip"
|
|
||||||
if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" (
|
|
||||||
curl -k -L "http://s3.amazonaws.com/ossci-windows/%CUDNN_INSTALL_ZIP%" --output "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" & REM @lint-ignore
|
|
||||||
if errorlevel 1 exit /b 1
|
|
||||||
set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%"
|
|
||||||
)
|
|
||||||
|
|
||||||
@REM cuDNN 8.3+ required zlib to be installed on the path
|
|
||||||
echo Installing ZLIB dlls
|
|
||||||
curl -k -L "http://s3.amazonaws.com/ossci-windows/zlib123dllx64.zip" --output "%SRC_DIR%\temp_build\zlib123dllx64.zip"
|
|
||||||
7z x "%SRC_DIR%\temp_build\zlib123dllx64.zip" -o"%SRC_DIR%\temp_build\zlib"
|
|
||||||
xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32"
|
|
||||||
|
|
||||||
goto cuda_common
|
|
||||||
|
|
||||||
:cuda_common
|
:cuda_common
|
||||||
:: NOTE: We only install CUDA if we don't have it installed already.
|
:: NOTE: We only install CUDA if we don't have it installed already.
|
||||||
:: With GHA runners these should be pre-installed as part of our AMI process
|
:: With GHA runners these should be pre-installed as part of our AMI process
|
||||||
|
|||||||
@ -1,9 +1,9 @@
|
|||||||
set WIN_DRIVER_VN=580.88
|
set WIN_DRIVER_VN=528.89
|
||||||
set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/%WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe" & REM @lint-ignore
|
set "DRIVER_DOWNLOAD_LINK=https://ossci-windows.s3.amazonaws.com/%WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe" & REM @lint-ignore
|
||||||
curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe
|
curl --retry 3 -kL %DRIVER_DOWNLOAD_LINK% --output %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe
|
||||||
if errorlevel 1 exit /b 1
|
if errorlevel 1 exit /b 1
|
||||||
|
|
||||||
start /wait %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe -s -noreboot
|
start /wait %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe -s -noreboot
|
||||||
if errorlevel 1 exit /b 1
|
if errorlevel 1 exit /b 1
|
||||||
|
|
||||||
del %WIN_DRIVER_VN%-data-center-tesla-desktop-win10-win11-64bit-dch-international.exe || ver > NUL
|
del %WIN_DRIVER_VN%-data-center-tesla-desktop-winserver-2016-2019-2022-dch-international.exe || ver > NUL
|
||||||
|
|||||||
@ -1,22 +1,12 @@
|
|||||||
set ADDITIONAL_OPTIONS=""
|
set ADDITIONAL_OPTIONS=""
|
||||||
set PYTHON_EXEC="python"
|
set PYTHON_EXEC="python"
|
||||||
|
|
||||||
|
|
||||||
if "%DESIRED_PYTHON%" == "3.13t" (
|
if "%DESIRED_PYTHON%" == "3.13t" (
|
||||||
echo Python version is set to 3.13t
|
echo Python version is set to 3.13t
|
||||||
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.13.0/python-3.13.0-amd64.exe"
|
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.13.0/python-3.13.0-amd64.exe"
|
||||||
set ADDITIONAL_OPTIONS="Include_freethreaded=1"
|
set ADDITIONAL_OPTIONS="Include_freethreaded=1"
|
||||||
set PYTHON_EXEC="python3.13t"
|
set PYTHON_EXEC="python3.13t"
|
||||||
) else if "%DESIRED_PYTHON%"=="3.14" (
|
|
||||||
echo Python version is set to 3.14 or 3.14t
|
|
||||||
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0rc1-amd64.exe"
|
|
||||||
) else if "%DESIRED_PYTHON%"=="3.14t" (
|
|
||||||
echo Python version is set to 3.14 or 3.14t
|
|
||||||
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0rc1-amd64.exe"
|
|
||||||
set ADDITIONAL_OPTIONS="Include_freethreaded=1"
|
|
||||||
set PYTHON_EXEC="python3.14t"
|
|
||||||
) else (
|
) else (
|
||||||
echo Python version is set to %DESIRED_PYTHON%
|
echo DESIRED_PYTHON not defined, Python version is set to %DESIRED_PYTHON%
|
||||||
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/%DESIRED_PYTHON%.0/python-%DESIRED_PYTHON%.0-amd64.exe" %= @lint-ignore =%
|
set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/%DESIRED_PYTHON%.0/python-%DESIRED_PYTHON%.0-amd64.exe" %= @lint-ignore =%
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@ -13,9 +13,9 @@ if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build"
|
|||||||
:xpu_bundle_install_start
|
:xpu_bundle_install_start
|
||||||
|
|
||||||
set XPU_BUNDLE_PARENT_DIR=C:\Program Files (x86)\Intel\oneAPI
|
set XPU_BUNDLE_PARENT_DIR=C:\Program Files (x86)\Intel\oneAPI
|
||||||
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe
|
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d6d6c17-ca2d-4735-9331-99447e4a1280/intel-deep-learning-essentials-2025.0.1.28_offline.exe
|
||||||
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product
|
set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product
|
||||||
set XPU_BUNDLE_VERSION=2025.1.3+5
|
set XPU_BUNDLE_VERSION=2025.0.1+20
|
||||||
set XPU_BUNDLE_INSTALLED=0
|
set XPU_BUNDLE_INSTALLED=0
|
||||||
set XPU_BUNDLE_UNINSTALL=0
|
set XPU_BUNDLE_UNINSTALL=0
|
||||||
set XPU_EXTRA_URL=NULL
|
set XPU_EXTRA_URL=NULL
|
||||||
@ -24,9 +24,9 @@ set XPU_EXTRA_VERSION=2025.0.1+1226
|
|||||||
set XPU_EXTRA_INSTALLED=0
|
set XPU_EXTRA_INSTALLED=0
|
||||||
set XPU_EXTRA_UNINSTALL=0
|
set XPU_EXTRA_UNINSTALL=0
|
||||||
|
|
||||||
if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.2] (
|
if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.1] (
|
||||||
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe
|
set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe
|
||||||
set XPU_BUNDLE_VERSION=2025.2.1+20
|
set XPU_BUNDLE_VERSION=2025.1.3+5
|
||||||
)
|
)
|
||||||
|
|
||||||
:: Check if XPU bundle is target version or already installed
|
:: Check if XPU bundle is target version or already installed
|
||||||
@ -90,3 +90,14 @@ if errorlevel 1 exit /b 1
|
|||||||
del xpu_extra.exe
|
del xpu_extra.exe
|
||||||
|
|
||||||
:xpu_install_end
|
:xpu_install_end
|
||||||
|
|
||||||
|
if not "%XPU_ENABLE_KINETO%"=="1" goto install_end
|
||||||
|
:: Install Level Zero SDK
|
||||||
|
set XPU_EXTRA_LZ_URL=https://github.com/oneapi-src/level-zero/releases/download/v1.14.0/level-zero-sdk_1.14.0.zip
|
||||||
|
curl -k -L %XPU_EXTRA_LZ_URL% --output "%SRC_DIR%\temp_build\level_zero_sdk.zip"
|
||||||
|
echo "Installing level zero SDK..."
|
||||||
|
7z x "%SRC_DIR%\temp_build\level_zero_sdk.zip" -o"%SRC_DIR%\temp_build\level_zero"
|
||||||
|
set "INCLUDE=%SRC_DIR%\temp_build\level_zero\include;%INCLUDE%"
|
||||||
|
del "%SRC_DIR%\temp_build\level_zero_sdk.zip"
|
||||||
|
|
||||||
|
:install_end
|
||||||
|
|||||||
@ -7,8 +7,6 @@ call "internal\install_python.bat"
|
|||||||
|
|
||||||
%PYTHON_EXEC% --version
|
%PYTHON_EXEC% --version
|
||||||
set "PATH=%CD%\Python\Lib\site-packages\cmake\data\bin;%CD%\Python\Scripts;%CD%\Python;%PATH%"
|
set "PATH=%CD%\Python\Lib\site-packages\cmake\data\bin;%CD%\Python\Scripts;%CD%\Python;%PATH%"
|
||||||
if "%DESIRED_PYTHON%" == "3.14t" %PYTHON_EXEC% -m pip install numpy==2.3.2 cmake
|
|
||||||
if "%DESIRED_PYTHON%" == "3.14" %PYTHON_EXEC% -m pip install numpy==2.3.2 cmake
|
|
||||||
if "%DESIRED_PYTHON%" == "3.13t" %PYTHON_EXEC% -m pip install numpy==2.2.1 cmake
|
if "%DESIRED_PYTHON%" == "3.13t" %PYTHON_EXEC% -m pip install numpy==2.2.1 cmake
|
||||||
if "%DESIRED_PYTHON%" == "3.13" %PYTHON_EXEC% -m pip install numpy==2.1.2 cmake
|
if "%DESIRED_PYTHON%" == "3.13" %PYTHON_EXEC% -m pip install numpy==2.1.2 cmake
|
||||||
if "%DESIRED_PYTHON%" == "3.12" %PYTHON_EXEC% -m pip install numpy==2.0.2 cmake
|
if "%DESIRED_PYTHON%" == "3.12" %PYTHON_EXEC% -m pip install numpy==2.0.2 cmake
|
||||||
|
|||||||
@ -124,31 +124,20 @@ popd
|
|||||||
|
|
||||||
export TH_BINARY_BUILD=1
|
export TH_BINARY_BUILD=1
|
||||||
export INSTALL_TEST=0 # dont install test binaries into site-packages
|
export INSTALL_TEST=0 # dont install test binaries into site-packages
|
||||||
export MACOSX_DEPLOYMENT_TARGET=11.0
|
export MACOSX_DEPLOYMENT_TARGET=10.15
|
||||||
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
|
export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
|
||||||
|
|
||||||
|
SETUPTOOLS_PINNED_VERSION="==70.1.0"
|
||||||
|
PYYAML_PINNED_VERSION="=5.3"
|
||||||
EXTRA_CONDA_INSTALL_FLAGS=""
|
EXTRA_CONDA_INSTALL_FLAGS=""
|
||||||
CONDA_ENV_CREATE_FLAGS=""
|
CONDA_ENV_CREATE_FLAGS=""
|
||||||
RENAME_WHEEL=true
|
RENAME_WHEEL=true
|
||||||
case $desired_python in
|
case $desired_python in
|
||||||
3.14t)
|
|
||||||
echo "Using 3.14 deps"
|
|
||||||
NUMPY_PINNED_VERSION="==2.1.0"
|
|
||||||
CONDA_ENV_CREATE_FLAGS="python-freethreading"
|
|
||||||
EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge"
|
|
||||||
desired_python="3.14.0rc1"
|
|
||||||
RENAME_WHEEL=false
|
|
||||||
;;
|
|
||||||
3.14)
|
|
||||||
echo "Using 3.14t deps"
|
|
||||||
NUMPY_PINNED_VERSION="==2.1.0"
|
|
||||||
EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge"
|
|
||||||
desired_python="3.14.0rc1"
|
|
||||||
RENAME_WHEEL=false
|
|
||||||
;;
|
|
||||||
3.13t)
|
3.13t)
|
||||||
echo "Using 3.13 deps"
|
echo "Using 3.13 deps"
|
||||||
NUMPY_PINNED_VERSION="==2.1.0"
|
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
|
||||||
|
PYYAML_PINNED_VERSION=">=6.0.1"
|
||||||
|
NUMPY_PINNED_VERSION="=2.1.0"
|
||||||
CONDA_ENV_CREATE_FLAGS="python-freethreading"
|
CONDA_ENV_CREATE_FLAGS="python-freethreading"
|
||||||
EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge"
|
EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge"
|
||||||
desired_python="3.13"
|
desired_python="3.13"
|
||||||
@ -156,23 +145,37 @@ case $desired_python in
|
|||||||
;;
|
;;
|
||||||
3.13)
|
3.13)
|
||||||
echo "Using 3.13 deps"
|
echo "Using 3.13 deps"
|
||||||
NUMPY_PINNED_VERSION="==2.1.0"
|
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
|
||||||
|
PYYAML_PINNED_VERSION=">=6.0.1"
|
||||||
|
NUMPY_PINNED_VERSION="=2.1.0"
|
||||||
;;
|
;;
|
||||||
3.12)
|
3.12)
|
||||||
echo "Using 3.12 deps"
|
echo "Using 3.12 deps"
|
||||||
NUMPY_PINNED_VERSION="==2.0.2"
|
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
|
||||||
|
PYYAML_PINNED_VERSION=">=6.0.1"
|
||||||
|
NUMPY_PINNED_VERSION="=2.0.2"
|
||||||
;;
|
;;
|
||||||
3.11)
|
3.11)
|
||||||
echo "Using 3.11 deps"
|
echo "Using 3.11 deps"
|
||||||
NUMPY_PINNED_VERSION="==2.0.2"
|
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
|
||||||
|
PYYAML_PINNED_VERSION=">=5.3"
|
||||||
|
NUMPY_PINNED_VERSION="=2.0.2"
|
||||||
;;
|
;;
|
||||||
3.10)
|
3.10)
|
||||||
echo "Using 3.10 deps"
|
echo "Using 3.10 deps"
|
||||||
NUMPY_PINNED_VERSION="==2.0.2"
|
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
|
||||||
|
PYYAML_PINNED_VERSION=">=5.3"
|
||||||
|
NUMPY_PINNED_VERSION="=2.0.2"
|
||||||
|
;;
|
||||||
|
3.9)
|
||||||
|
echo "Using 3.9 deps"
|
||||||
|
SETUPTOOLS_PINNED_VERSION=">=70.1.0"
|
||||||
|
PYYAML_PINNED_VERSION=">=5.3"
|
||||||
|
NUMPY_PINNED_VERSION="=2.0.2"
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
echo "Unsupported version $desired_python"
|
echo "Using default deps"
|
||||||
exit 1
|
NUMPY_PINNED_VERSION="=1.11.3"
|
||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
@ -181,17 +184,17 @@ tmp_env_name="wheel_py$python_nodot"
|
|||||||
conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS}
|
conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS}
|
||||||
source activate "$tmp_env_name"
|
source activate "$tmp_env_name"
|
||||||
|
|
||||||
PINNED_PACKAGES=(
|
retry pip install -r "${pytorch_rootdir}/requirements-build.txt"
|
||||||
"numpy${NUMPY_PINNED_VERSION}"
|
pip install "numpy=${NUMPY_PINNED_VERSION}" "pyyaml${PYYAML_PINNED_VERSION}" requests ninja "setuptools${SETUPTOOLS_PINNED_VERSION}" typing-extensions
|
||||||
)
|
|
||||||
retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements-build.txt"
|
|
||||||
pip install requests ninja typing-extensions
|
|
||||||
retry pip install -r "${pytorch_rootdir}/requirements.txt" || true
|
retry pip install -r "${pytorch_rootdir}/requirements.txt" || true
|
||||||
retry brew install libomp
|
retry brew install libomp
|
||||||
|
|
||||||
# For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule
|
# For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule
|
||||||
export USE_DISTRIBUTED=1
|
export USE_DISTRIBUTED=1
|
||||||
|
|
||||||
|
if [[ -n "$CROSS_COMPILE_ARM64" ]]; then
|
||||||
|
export CMAKE_OSX_ARCHITECTURES=arm64
|
||||||
|
fi
|
||||||
export USE_MKLDNN=OFF
|
export USE_MKLDNN=OFF
|
||||||
export USE_QNNPACK=OFF
|
export USE_QNNPACK=OFF
|
||||||
export BUILD_TEST=OFF
|
export BUILD_TEST=OFF
|
||||||
@ -199,7 +202,16 @@ export BUILD_TEST=OFF
|
|||||||
pushd "$pytorch_rootdir"
|
pushd "$pytorch_rootdir"
|
||||||
echo "Calling setup.py bdist_wheel at $(date)"
|
echo "Calling setup.py bdist_wheel at $(date)"
|
||||||
|
|
||||||
python setup.py bdist_wheel -d "$whl_tmp_dir" --plat-name ${mac_version}
|
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||||
|
echo "Calling setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||||
|
BUILD_LIBTORCH_WHL=1 BUILD_PYTHON_ONLY=0 python setup.py bdist_wheel -d "$whl_tmp_dir"
|
||||||
|
echo "Finished setup.py bdist_wheel for split build (BUILD_LIBTORCH_WHL)"
|
||||||
|
echo "Calling setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||||
|
BUILD_LIBTORCH_WHL=0 BUILD_PYTHON_ONLY=1 CMAKE_FRESH=1 python setup.py bdist_wheel -d "$whl_tmp_dir"
|
||||||
|
echo "Finished setup.py bdist_wheel for split build (BUILD_PYTHON_ONLY)"
|
||||||
|
else
|
||||||
|
python setup.py bdist_wheel -d "$whl_tmp_dir"
|
||||||
|
fi
|
||||||
|
|
||||||
echo "Finished setup.py bdist_wheel at $(date)"
|
echo "Finished setup.py bdist_wheel at $(date)"
|
||||||
|
|
||||||
|
|||||||
@ -65,8 +65,16 @@ fi
|
|||||||
|
|
||||||
if [[ "$PACKAGE_TYPE" != libtorch ]]; then
|
if [[ "$PACKAGE_TYPE" != libtorch ]]; then
|
||||||
if [[ "\$BUILD_ENVIRONMENT" != *s390x* ]]; then
|
if [[ "\$BUILD_ENVIRONMENT" != *s390x* ]]; then
|
||||||
|
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
|
||||||
|
pkg_no_python="$(ls -1 /final_pkgs/torch_no_python* | sort |tail -1)"
|
||||||
|
pkg_torch="$(ls -1 /final_pkgs/torch-* | sort |tail -1)"
|
||||||
|
# todo: after folder is populated use the pypi_pkg channel instead
|
||||||
|
pip install "\$pkg_no_python" "\$pkg_torch" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}_pypi_pkg"
|
||||||
|
retry pip install -q numpy protobuf typing-extensions
|
||||||
|
else
|
||||||
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
|
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
|
||||||
retry pip install -q numpy protobuf typing-extensions
|
retry pip install -q numpy protobuf typing-extensions
|
||||||
|
fi
|
||||||
else
|
else
|
||||||
pip install "\$pkg"
|
pip install "\$pkg"
|
||||||
retry pip install -q numpy protobuf typing-extensions
|
retry pip install -q numpy protobuf typing-extensions
|
||||||
|
|||||||
@ -75,8 +75,8 @@ TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt)
|
|||||||
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
|
# Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT
|
||||||
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
|
TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'"
|
||||||
|
|
||||||
# CUDA 12.9/13.0 builds have triton for Linux and Linux aarch64 binaries.
|
# CUDA 12.9 builds have triton for Linux and Linux aarch64 binaries.
|
||||||
if [[ "$DESIRED_CUDA" == "cu129" ]] || [[ "$DESIRED_CUDA" == "cu130" ]]; then
|
if [[ "$DESIRED_CUDA" == "cu129" ]]; then
|
||||||
TRITON_CONSTRAINT="platform_system == 'Linux'"
|
TRITON_CONSTRAINT="platform_system == 'Linux'"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
@ -134,6 +134,7 @@ export DESIRED_PYTHON="${DESIRED_PYTHON:-}"
|
|||||||
export DESIRED_CUDA="$DESIRED_CUDA"
|
export DESIRED_CUDA="$DESIRED_CUDA"
|
||||||
export LIBTORCH_VARIANT="${LIBTORCH_VARIANT:-}"
|
export LIBTORCH_VARIANT="${LIBTORCH_VARIANT:-}"
|
||||||
export BUILD_PYTHONLESS="${BUILD_PYTHONLESS:-}"
|
export BUILD_PYTHONLESS="${BUILD_PYTHONLESS:-}"
|
||||||
|
export USE_SPLIT_BUILD="${USE_SPLIT_BUILD:-}"
|
||||||
if [[ "${OSTYPE}" == "msys" ]]; then
|
if [[ "${OSTYPE}" == "msys" ]]; then
|
||||||
export LIBTORCH_CONFIG="${LIBTORCH_CONFIG:-}"
|
export LIBTORCH_CONFIG="${LIBTORCH_CONFIG:-}"
|
||||||
if [[ "${LIBTORCH_CONFIG:-}" == 'debug' ]]; then
|
if [[ "${LIBTORCH_CONFIG:-}" == 'debug' ]]; then
|
||||||
|
|||||||
@ -23,6 +23,10 @@ if [[ "${DRY_RUN}" = "disabled" ]]; then
|
|||||||
AWS_S3_CP="aws s3 cp"
|
AWS_S3_CP="aws s3 cp"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
if [[ "${USE_SPLIT_BUILD:-false}" == "true" ]]; then
|
||||||
|
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_pypi_pkg"
|
||||||
|
fi
|
||||||
|
|
||||||
# this is special build with all dependencies packaged
|
# this is special build with all dependencies packaged
|
||||||
if [[ ${BUILD_NAME} == *-full* ]]; then
|
if [[ ${BUILD_NAME} == *-full* ]]; then
|
||||||
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_full"
|
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_full"
|
||||||
@ -51,12 +55,16 @@ s3_upload() {
|
|||||||
s3_upload_dir="${s3_root_dir}/${UPLOAD_SUBFOLDER}/"
|
s3_upload_dir="${s3_root_dir}/${UPLOAD_SUBFOLDER}/"
|
||||||
fi
|
fi
|
||||||
(
|
(
|
||||||
|
cache_control_flag=""
|
||||||
|
if [[ "${UPLOAD_CHANNEL}" = "test" ]]; then
|
||||||
|
cache_control_flag="--cache-control='no-cache,no-store,must-revalidate'"
|
||||||
|
fi
|
||||||
for pkg in ${PKG_DIR}/*.${extension}; do
|
for pkg in ${PKG_DIR}/*.${extension}; do
|
||||||
(
|
(
|
||||||
set -x
|
set -x
|
||||||
shm_id=$(sha256sum "${pkg}" | awk '{print $1}')
|
shm_id=$(sha256sum "${pkg}" | awk '{print $1}')
|
||||||
${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}" \
|
${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}" \
|
||||||
--metadata "checksum-sha256=${shm_id}"
|
--metadata "checksum-sha256=${shm_id}" ${cache_control_flag}
|
||||||
)
|
)
|
||||||
done
|
done
|
||||||
)
|
)
|
||||||
|
|||||||
@ -15,7 +15,8 @@ fi
|
|||||||
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
||||||
export VC_YEAR=2022
|
export VC_YEAR=2022
|
||||||
export USE_SCCACHE=0
|
export USE_SCCACHE=0
|
||||||
export XPU_VERSION=2025.2
|
export XPU_VERSION=2025.1
|
||||||
|
export XPU_ENABLE_KINETO=1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "Free space on filesystem before build:"
|
echo "Free space on filesystem before build:"
|
||||||
|
|||||||
@ -8,7 +8,7 @@ export VC_YEAR=2022
|
|||||||
|
|
||||||
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
if [[ "$DESIRED_CUDA" == 'xpu' ]]; then
|
||||||
export VC_YEAR=2022
|
export VC_YEAR=2022
|
||||||
export XPU_VERSION=2025.2
|
export XPU_VERSION=2025.1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
pushd "$PYTORCH_ROOT/.ci/pytorch/"
|
pushd "$PYTORCH_ROOT/.ci/pytorch/"
|
||||||
|
|||||||
1
.flake8
1
.flake8
@ -48,7 +48,6 @@ per-file-ignores =
|
|||||||
torch/__init__.py: F401,TOR901
|
torch/__init__.py: F401,TOR901
|
||||||
torch/_custom_op/impl.py: TOR901
|
torch/_custom_op/impl.py: TOR901
|
||||||
torch/_export/serde/upgrade.py: TOR901
|
torch/_export/serde/upgrade.py: TOR901
|
||||||
torch/_functorch/predispatch.py: TOR901
|
|
||||||
torch/_functorch/vmap.py: TOR901
|
torch/_functorch/vmap.py: TOR901
|
||||||
torch/_inductor/test_operators.py: TOR901
|
torch/_inductor/test_operators.py: TOR901
|
||||||
torch/_library/abstract_impl.py: TOR901
|
torch/_library/abstract_impl.py: TOR901
|
||||||
|
|||||||
3
.github/actionlint.yaml
vendored
3
.github/actionlint.yaml
vendored
@ -12,9 +12,7 @@ self-hosted-runner:
|
|||||||
- linux.9xlarge.ephemeral
|
- linux.9xlarge.ephemeral
|
||||||
- am2.linux.9xlarge.ephemeral
|
- am2.linux.9xlarge.ephemeral
|
||||||
- linux.12xlarge
|
- linux.12xlarge
|
||||||
- linux.12xlarge.memory
|
|
||||||
- linux.24xlarge
|
- linux.24xlarge
|
||||||
- linux.24xlarge.memory
|
|
||||||
- linux.24xlarge.ephemeral
|
- linux.24xlarge.ephemeral
|
||||||
- linux.24xlarge.amd
|
- linux.24xlarge.amd
|
||||||
- linux.arm64.2xlarge
|
- linux.arm64.2xlarge
|
||||||
@ -56,7 +54,6 @@ self-hosted-runner:
|
|||||||
- linux.rocm.gpu.2
|
- linux.rocm.gpu.2
|
||||||
- linux.rocm.gpu.4
|
- linux.rocm.gpu.4
|
||||||
# gfx942 runners
|
# gfx942 runners
|
||||||
- linux.rocm.gpu.gfx942.1
|
|
||||||
- linux.rocm.gpu.gfx942.2
|
- linux.rocm.gpu.gfx942.2
|
||||||
- linux.rocm.gpu.gfx942.4
|
- linux.rocm.gpu.gfx942.4
|
||||||
- rocm-docker
|
- rocm-docker
|
||||||
|
|||||||
@ -1,86 +0,0 @@
|
|||||||
# .github/workflows/build-external.yml
|
|
||||||
name: Build External packages
|
|
||||||
|
|
||||||
description: build external packages for PyTorch
|
|
||||||
|
|
||||||
inputs:
|
|
||||||
cuda-version:
|
|
||||||
description: CUDA version to use
|
|
||||||
type: string
|
|
||||||
required: true
|
|
||||||
default: '12.8.1'
|
|
||||||
cuda-arch-list:
|
|
||||||
description: TORCH_CUDA_ARCH_LIST (e.g., "8.0;8.9;9.0")
|
|
||||||
type: string
|
|
||||||
required: true
|
|
||||||
default: ""
|
|
||||||
docker-image:
|
|
||||||
description: Base image to use
|
|
||||||
type: string
|
|
||||||
required: true
|
|
||||||
build-targets:
|
|
||||||
description: Build targets
|
|
||||||
type: string
|
|
||||||
required: true
|
|
||||||
torch-wheel-dir:
|
|
||||||
description: Directory to built torch wheel
|
|
||||||
type: string
|
|
||||||
required: false
|
|
||||||
default: dist
|
|
||||||
output-dir:
|
|
||||||
description: Directory to store build artifact
|
|
||||||
default: external
|
|
||||||
type: string
|
|
||||||
required: false
|
|
||||||
|
|
||||||
outputs:
|
|
||||||
build_time:
|
|
||||||
description: "Total build time in seconds"
|
|
||||||
value: ${{ steps.build-external.outputs.build_time }}
|
|
||||||
output_dir:
|
|
||||||
description: "Directory where build artifact is stored"
|
|
||||||
value: ${{ steps.build-external.outputs.output_dir }}
|
|
||||||
|
|
||||||
runs:
|
|
||||||
using: composite
|
|
||||||
steps:
|
|
||||||
- name: Build external packages in sequence
|
|
||||||
id: build-external
|
|
||||||
env:
|
|
||||||
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
|
||||||
SCCACHE_REGION: us-east-1
|
|
||||||
CUDA_VERSION: ${{ inputs.cuda-version }}
|
|
||||||
TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }}
|
|
||||||
BASE_IMAGE: ${{ inputs.docker-image }}
|
|
||||||
BUILD_TARGETS: ${{ inputs.build-targets }}
|
|
||||||
PARENT_OUTPUT_DIR: ${{ inputs.output-dir }}
|
|
||||||
TORCH_WHEELS_PATH: ${{ inputs.torch-wheel-dir }}
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
set -euo pipefail
|
|
||||||
python3 --version
|
|
||||||
docker images
|
|
||||||
START_TIME=$(date +%s)
|
|
||||||
(
|
|
||||||
cd .ci/lumen_cli
|
|
||||||
python3 -m pip install -e .
|
|
||||||
)
|
|
||||||
MAX_JOBS="$(nproc --ignore=6)"
|
|
||||||
export MAX_JOBS
|
|
||||||
|
|
||||||
# Split the comma-separated list and build each target
|
|
||||||
IFS=',' read -ra TARGETS <<< "$BUILD_TARGETS"
|
|
||||||
for target in "${TARGETS[@]}"; do
|
|
||||||
OUTPUT_DIR="$PARENT_OUTPUT_DIR/$target"
|
|
||||||
export OUTPUT_DIR
|
|
||||||
echo "Building external package: $target in directory $OUTPUT_DIR"
|
|
||||||
python3 -m cli.run build external "$target"
|
|
||||||
done
|
|
||||||
|
|
||||||
END_TIME=$(date +%s)
|
|
||||||
{
|
|
||||||
echo "build_time=$((END_TIME - START_TIME))"
|
|
||||||
if [ -d "$PARENT_OUTPUT_DIR" ]; then
|
|
||||||
echo "output_dir=$PARENT_OUTPUT_DIR"
|
|
||||||
fi
|
|
||||||
} >> "$GITHUB_OUTPUT"
|
|
||||||
15
.github/actions/checkout-pytorch/action.yml
vendored
15
.github/actions/checkout-pytorch/action.yml
vendored
@ -57,21 +57,6 @@ runs:
|
|||||||
submodules: ${{ inputs.submodules }}
|
submodules: ${{ inputs.submodules }}
|
||||||
show-progress: false
|
show-progress: false
|
||||||
|
|
||||||
- name: Clean submodules post checkout
|
|
||||||
id: clean-submodules
|
|
||||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
|
||||||
shell: bash
|
|
||||||
env:
|
|
||||||
NO_SUDO: ${{ inputs.no-sudo }}
|
|
||||||
run: |
|
|
||||||
cd "${GITHUB_WORKSPACE}"
|
|
||||||
# Clean stale submodule dirs
|
|
||||||
if [ -z "${NO_SUDO}" ]; then
|
|
||||||
sudo git submodule foreach --recursive git clean -ffdx
|
|
||||||
else
|
|
||||||
git submodule foreach --recursive git clean -ffdx
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Clean workspace (try again)
|
- name: Clean workspace (try again)
|
||||||
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' &&
|
if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' &&
|
||||||
(steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success') }}
|
(steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success') }}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user