mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-31 12:15:03 +08:00 
			
		
		
		
	Compare commits
	
		
			2 Commits
		
	
	
		
			module-shi
			...
			ngimel/err
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 178d6debde | |||
| 14a801ffb6 | 
| @ -1,15 +0,0 @@ | ||||
| version: 1 | ||||
| paths: | ||||
| include: | ||||
|   - "**/*.py" | ||||
| exclude: | ||||
|   - ".*" | ||||
|   - ".*/**" | ||||
|   - "**/.*/**" | ||||
|   - "**/.*" | ||||
|   - "**/_*/**" | ||||
|   - "**/_*.py" | ||||
|   - "**/test/**" | ||||
|   - "**/benchmarks/**" | ||||
|   - "**/test_*.py" | ||||
|   - "**/*_test.py" | ||||
| @ -7,15 +7,6 @@ if [[ "$GPU_ARCH_VERSION" == *"12.9"* ]]; then | ||||
|     export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0" | ||||
| fi | ||||
|  | ||||
| if [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then | ||||
|     export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0" | ||||
| fi | ||||
|  | ||||
| # Compress the fatbin with -compress-mode=size for CUDA 13 | ||||
| if [[ "$DESIRED_CUDA" == *"13"* ]]; then | ||||
|     export TORCH_NVCC_FLAGS="-compress-mode=size" | ||||
| fi | ||||
|  | ||||
| SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )" | ||||
| source $SCRIPTPATH/aarch64_ci_setup.sh | ||||
|  | ||||
|  | ||||
| @ -77,24 +77,21 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None: | ||||
|     wheelname = os.path.basename(wheel_path) | ||||
|     os.mkdir(f"{folder}/tmp") | ||||
|     os.system(f"unzip {wheel_path} -d {folder}/tmp") | ||||
|     # Common libraries for all CUDA versions | ||||
|     common_libs = [ | ||||
|         # Non-NVIDIA system libraries | ||||
|         "/lib64/libgomp.so.1", | ||||
|         "/usr/lib64/libgfortran.so.5", | ||||
|         "/acl/build/libarm_compute.so", | ||||
|         "/acl/build/libarm_compute_graph.so", | ||||
|         # Common CUDA libraries (same for all versions) | ||||
|         "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", | ||||
|         "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", | ||||
|         "/usr/local/lib/libnvpl_lapack_core.so.0", | ||||
|         "/usr/local/lib/libnvpl_blas_core.so.0", | ||||
|     libs_to_copy = [ | ||||
|         "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", | ||||
|         "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so", | ||||
|         "/usr/local/cuda/lib64/libcudnn.so.9", | ||||
|         "/usr/local/cuda/lib64/libcublas.so.12", | ||||
|         "/usr/local/cuda/lib64/libcublasLt.so.12", | ||||
|         "/usr/local/cuda/lib64/libcudart.so.12", | ||||
|         "/usr/local/cuda/lib64/libcufft.so.11", | ||||
|         "/usr/local/cuda/lib64/libcusparse.so.12", | ||||
|         "/usr/local/cuda/lib64/libcusparseLt.so.0", | ||||
|         "/usr/local/cuda/lib64/libcusolver.so.11", | ||||
|         "/usr/local/cuda/lib64/libcurand.so.10", | ||||
|         "/usr/local/cuda/lib64/libnccl.so.2", | ||||
|         "/usr/local/cuda/lib64/libnvshmem_host.so.3", | ||||
|         "/usr/local/cuda/lib64/libnvJitLink.so.12", | ||||
|         "/usr/local/cuda/lib64/libnvrtc.so.12", | ||||
|         "/usr/local/cuda/lib64/libcudnn_adv.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_cnn.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_graph.so.9", | ||||
| @ -102,41 +99,22 @@ def package_cuda_wheel(wheel_path, desired_cuda) -> None: | ||||
|         "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9", | ||||
|         "/usr/local/cuda/lib64/libcudnn_heuristic.so.9", | ||||
|         "/usr/local/cuda/lib64/libcufile.so.0", | ||||
|         "/usr/local/cuda/lib64/libcufile_rdma.so.1", | ||||
|         "/usr/local/cuda/lib64/libcusparse.so.12", | ||||
|         "/lib64/libgomp.so.1", | ||||
|         "/usr/lib64/libgfortran.so.5", | ||||
|         "/acl/build/libarm_compute.so", | ||||
|         "/acl/build/libarm_compute_graph.so", | ||||
|         "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", | ||||
|         "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", | ||||
|         "/usr/local/lib/libnvpl_lapack_core.so.0", | ||||
|         "/usr/local/lib/libnvpl_blas_core.so.0", | ||||
|     ] | ||||
|  | ||||
|     # CUDA version-specific libraries | ||||
|     if "130" in desired_cuda: | ||||
|         version_specific_libs = [ | ||||
|             "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13", | ||||
|             "/usr/local/cuda/lib64/libcublas.so.13", | ||||
|             "/usr/local/cuda/lib64/libcublasLt.so.13", | ||||
|             "/usr/local/cuda/lib64/libcudart.so.13", | ||||
|             "/usr/local/cuda/lib64/libcufft.so.12", | ||||
|             "/usr/local/cuda/lib64/libcusolver.so.12", | ||||
|             "/usr/local/cuda/lib64/libnvJitLink.so.13", | ||||
|             "/usr/local/cuda/lib64/libnvrtc.so.13", | ||||
|             "/usr/local/cuda/lib64/libnvrtc-builtins.so.13.0", | ||||
|     if "129" in desired_cuda: | ||||
|         libs_to_copy += [ | ||||
|             "/usr/local/cuda/lib64/libnvrtc-builtins.so.12.9", | ||||
|             "/usr/local/cuda/lib64/libcufile.so.0", | ||||
|             "/usr/local/cuda/lib64/libcufile_rdma.so.1", | ||||
|         ] | ||||
|     elif "12" in desired_cuda: | ||||
|         # Get the last character for libnvrtc-builtins version (e.g., "129" -> "9") | ||||
|         minor_version = desired_cuda[-1] | ||||
|         version_specific_libs = [ | ||||
|             "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", | ||||
|             "/usr/local/cuda/lib64/libcublas.so.12", | ||||
|             "/usr/local/cuda/lib64/libcublasLt.so.12", | ||||
|             "/usr/local/cuda/lib64/libcudart.so.12", | ||||
|             "/usr/local/cuda/lib64/libcufft.so.11", | ||||
|             "/usr/local/cuda/lib64/libcusolver.so.11", | ||||
|             "/usr/local/cuda/lib64/libnvJitLink.so.12", | ||||
|             "/usr/local/cuda/lib64/libnvrtc.so.12", | ||||
|             f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}", | ||||
|         ] | ||||
|  | ||||
|     # Combine all libraries | ||||
|     libs_to_copy = common_libs + version_specific_libs | ||||
|  | ||||
|     # Copy libraries to unzipped_folder/a/lib | ||||
|     for lib_path in libs_to_copy: | ||||
| @ -231,6 +209,8 @@ if __name__ == "__main__": | ||||
|     # MAX_JOB=5 is not required for CPU backend (see commit 465d98b) | ||||
|     if enable_cuda: | ||||
|         build_vars += "MAX_JOBS=5 " | ||||
|         # nvshmem is broken for aarch64 see https://github.com/pytorch/pytorch/issues/160425 | ||||
|         build_vars += "USE_NVSHMEM=OFF " | ||||
|  | ||||
|     override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") | ||||
|     desired_cuda = os.getenv("DESIRED_CUDA") | ||||
|  | ||||
| @ -120,8 +120,8 @@ If your new Docker image needs a library installed from a specific pinned commit | ||||
|    If you're introducing a new argument to the Docker build, make sure to add it in the Docker build step in `.ci/docker/build.sh`: | ||||
|    ```bash | ||||
|    docker build \ | ||||
|      .... | ||||
|      --build-arg "NEW_ARG_1=${NEW_ARG_1}" | ||||
|       .... | ||||
|       --build-arg "NEW_ARG_1=${NEW_ARG_1}" | ||||
|    ``` | ||||
|  | ||||
| 3. **Update Dockerfile logic**: | ||||
|  | ||||
| @ -64,10 +64,6 @@ FROM cuda as cuda12.9 | ||||
| RUN bash ./install_cuda.sh 12.9 | ||||
| ENV DESIRED_CUDA=12.9 | ||||
|  | ||||
| FROM cuda as cuda13.0 | ||||
| RUN bash ./install_cuda.sh 13.0 | ||||
| ENV DESIRED_CUDA=13.0 | ||||
|  | ||||
| FROM ${ROCM_IMAGE} as rocm | ||||
| ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201" | ||||
| ADD ./common/install_mkl.sh install_mkl.sh | ||||
| @ -80,10 +76,10 @@ ADD ./common/install_mnist.sh install_mnist.sh | ||||
| RUN bash ./install_mnist.sh | ||||
|  | ||||
| FROM base as all_cuda | ||||
| COPY --from=cuda11.8  /usr/local/cuda-11.8 /usr/local/cuda-11.8 | ||||
| COPY --from=cuda12.6  /usr/local/cuda-12.6 /usr/local/cuda-12.6 | ||||
| COPY --from=cuda12.8  /usr/local/cuda-12.8 /usr/local/cuda-12.8 | ||||
| COPY --from=cuda12.9  /usr/local/cuda-12.9 /usr/local/cuda-12.9 | ||||
| COPY --from=cuda13.0  /usr/local/cuda-13.0 /usr/local/cuda-13.0 | ||||
|  | ||||
| # Final step | ||||
| FROM ${BASE_TARGET} as final | ||||
|  | ||||
| @ -76,13 +76,10 @@ elif [[ "$image" == *cuda*linter* ]]; then | ||||
| elif [[ "$image" == *linter* ]]; then | ||||
|   # Use a separate Dockerfile for linter to keep a small image size | ||||
|   DOCKERFILE="linter/Dockerfile" | ||||
| elif [[ "$image" == *riscv* ]]; then | ||||
|   # Use RISC-V specific Dockerfile | ||||
|   DOCKERFILE="ubuntu-cross-riscv/Dockerfile" | ||||
| fi | ||||
|  | ||||
| _UCX_COMMIT=7836b165abdbe468a2f607e7254011c07d788152 | ||||
| _UCC_COMMIT=430e241bf5d38cbc73fc7a6b89155397232e3f96 | ||||
| _UCX_COMMIT=7bb2722ff2187a0cad557ae4a6afa090569f83fb | ||||
| _UCC_COMMIT=20eae37090a4ce1b32bcce6144ccad0b49943e0b | ||||
| if [[ "$image" == *rocm* ]]; then | ||||
|   _UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6 | ||||
|   _UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d | ||||
| @ -114,19 +111,31 @@ case "$tag" in | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11) | ||||
|     CUDA_VERSION=13.0.0 | ||||
|   pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.8.1 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     GCC_VERSION=9 | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks) | ||||
|   pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.8.1 | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     ANACONDA_PYTHON_VERSION=3.12 | ||||
|     GCC_VERSION=9 | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
|     UCX_COMMIT=${_UCX_COMMIT} | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     TRITON=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-cuda12.8-cudnn9-py3.13-gcc9-inductor-benchmarks) | ||||
|     CUDA_VERSION=12.8.1 | ||||
|     ANACONDA_PYTHON_VERSION=3.13 | ||||
|     GCC_VERSION=9 | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
| @ -156,13 +165,13 @@ case "$tag" in | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3-clang12-onnx) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CLANG_VERSION=12 | ||||
|     VISION=yes | ||||
|     ONNX=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3.10-clang12) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|   pytorch-linux-jammy-py3.9-clang12) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CLANG_VERSION=12 | ||||
|     VISION=yes | ||||
|     TRITON=yes | ||||
| @ -197,24 +206,23 @@ case "$tag" in | ||||
|     UCC_COMMIT=${_UCC_COMMIT} | ||||
|     PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950" | ||||
|     ;; | ||||
|   pytorch-linux-jammy-xpu-n-1-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|   pytorch-linux-jammy-xpu-2025.0-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     XPU_VERSION=2025.0 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-xpu-2025.1-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     XPU_VERSION=2025.1 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-xpu-n-py3) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     XPU_VERSION=2025.2 | ||||
|     NINJA_VERSION=1.9.0 | ||||
|     TRITON=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3-gcc11-inductor-benchmarks) | ||||
|     # TODO (huydhn): Upgrade this to Python >= 3.10 | ||||
|   pytorch-linux-jammy-py3.9-gcc11-inductor-benchmarks) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
| @ -223,8 +231,8 @@ case "$tag" in | ||||
|     DOCS=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|   pytorch-linux-jammy-cuda12.8-cudnn9-py3.9-clang12) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     CUDA_VERSION=12.8.1 | ||||
|     CLANG_VERSION=12 | ||||
|     VISION=yes | ||||
| @ -235,8 +243,8 @@ case "$tag" in | ||||
|     CLANG_VERSION=18 | ||||
|     VISION=yes | ||||
|     ;; | ||||
|   pytorch-linux-jammy-py3.10-gcc11) | ||||
|     ANACONDA_PYTHON_VERSION=3.10 | ||||
|   pytorch-linux-jammy-py3.9-gcc11) | ||||
|     ANACONDA_PYTHON_VERSION=3.9 | ||||
|     GCC_VERSION=11 | ||||
|     VISION=yes | ||||
|     KATEX=yes | ||||
| @ -277,6 +285,7 @@ case "$tag" in | ||||
|     GCC_VERSION=11 | ||||
|     ACL=yes | ||||
|     VISION=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     OPENBLAS=yes | ||||
|     # snadampal: skipping llvm src build install because the current version | ||||
|     # from pytorch/llvm:9.0.1 is x86 specific | ||||
| @ -287,15 +296,13 @@ case "$tag" in | ||||
|     GCC_VERSION=11 | ||||
|     ACL=yes | ||||
|     VISION=yes | ||||
|     CONDA_CMAKE=yes | ||||
|     OPENBLAS=yes | ||||
|     # snadampal: skipping llvm src build install because the current version | ||||
|     # from pytorch/llvm:9.0.1 is x86 specific | ||||
|     SKIP_LLVM_SRC_BUILD_INSTALL=yes | ||||
|     INDUCTOR_BENCHMARKS=yes | ||||
|     ;; | ||||
|   pytorch-linux-noble-riscv64-py3.12-gcc14) | ||||
|     GCC_VERSION=14 | ||||
|     ;; | ||||
|   *) | ||||
|     # Catch-all for builds that are not hardcoded. | ||||
|     VISION=yes | ||||
| @ -416,14 +423,7 @@ if [ -n "$ANACONDA_PYTHON_VERSION" ]; then | ||||
| fi | ||||
|  | ||||
| if [ -n "$GCC_VERSION" ]; then | ||||
|   if [[ "$image" == *riscv* ]]; then | ||||
|     # Check RISC-V cross-compilation toolchain version | ||||
|     if !(drun riscv64-linux-gnu-gcc-${GCC_VERSION} --version 2>&1 | grep -q " $GCC_VERSION\\W"); then | ||||
|       echo "RISC-V GCC_VERSION=$GCC_VERSION, but:" | ||||
|       drun riscv64-linux-gnu-gcc-${GCC_VERSION} --version | ||||
|       exit 1 | ||||
|     fi | ||||
|   elif !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then | ||||
|   if !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then | ||||
|     echo "GCC_VERSION=$GCC_VERSION, but:" | ||||
|     drun gcc --version | ||||
|     exit 1 | ||||
|  | ||||
| @ -1,2 +0,0 @@ | ||||
| transformers==4.54.0 | ||||
| soxr==0.5.0 | ||||
							
								
								
									
										1
									
								
								.ci/docker/ci_commit_pins/huggingface.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								.ci/docker/ci_commit_pins/huggingface.txt
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1 @@ | ||||
| v4.54.0 | ||||
| @ -1 +0,0 @@ | ||||
| v2.27.7-1 | ||||
| @ -1 +1 @@ | ||||
| 74a23feff57432129df84d8099e622773cf77925 | ||||
| e03a63be43e33596f7f0a43b0f530353785e4a59 | ||||
|  | ||||
| @ -1 +1 @@ | ||||
| d0e80f39c562c70986fc548fa6e5852ad86e16e7 | ||||
| ae324eeac8e102a2b40370e341460f3791353398 | ||||
|  | ||||
| @ -83,9 +83,9 @@ function build_cpython { | ||||
|         py_suffix=${py_ver::-1} | ||||
|         py_folder=$py_suffix | ||||
|     fi | ||||
|     # Update to rc2 due to https://github.com/python/cpython/commit/c72699086fe4 | ||||
|     # Only b3 is available now | ||||
|     if [ "$py_suffix" == "3.14.0" ]; then | ||||
|         py_suffix="3.14.0rc2" | ||||
|         py_suffix="3.14.0b3" | ||||
|     fi | ||||
|     wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz | ||||
|     do_cpython_build $py_ver Python-$py_suffix | ||||
|  | ||||
| @ -10,7 +10,7 @@ else | ||||
|   arch_path='sbsa' | ||||
| fi | ||||
|  | ||||
| NVSHMEM_VERSION=3.3.24 | ||||
| NVSHMEM_VERSION=3.3.9 | ||||
|  | ||||
| function install_cuda { | ||||
|   version=$1 | ||||
| @ -62,16 +62,14 @@ function install_nvshmem { | ||||
|   mkdir -p "${tmpdir}" && cd "${tmpdir}" | ||||
|  | ||||
|   # nvSHMEM license: https://docs.nvidia.com/nvshmem/api/sla.html | ||||
|   # This pattern is a lie as it is not consistent across versions, for 3.3.9 it was cuda_ver-arch-nvshhem-ver | ||||
|   filename="libnvshmem-linux-${arch_path}-${nvshmem_version}_cuda${cuda_major_version}-archive" | ||||
|   suffix=".tar.xz" | ||||
|   url="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/linux-${arch_path}/${filename}${suffix}" | ||||
|   filename="libnvshmem_cuda${cuda_major_version}-linux-${arch_path}-${nvshmem_version}" | ||||
|   url="https://developer.download.nvidia.com/compute/redist/nvshmem/${nvshmem_version}/builds/cuda${cuda_major_version}/txz/agnostic/${dl_arch}/${filename}.tar.gz" | ||||
|  | ||||
|   # download, unpack, install | ||||
|   wget -q "${url}" | ||||
|   tar xf "${filename}${suffix}" | ||||
|   cp -a "${filename}/include/"* /usr/local/cuda/include/ | ||||
|   cp -a "${filename}/lib/"*     /usr/local/cuda/lib64/ | ||||
|   tar xf "${filename}.tar.gz" | ||||
|   cp -a "libnvshmem/include/"* /usr/local/cuda/include/ | ||||
|   cp -a "libnvshmem/lib/"*     /usr/local/cuda/lib64/ | ||||
|  | ||||
|   # cleanup | ||||
|   cd .. | ||||
| @ -128,6 +126,74 @@ function install_129 { | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function prune_124 { | ||||
|   echo "Pruning CUDA 12.4" | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.4 prune static libs | ||||
|   ##################################################################################### | ||||
|   export NVPRUNE="/usr/local/cuda-12.4/bin/nvprune" | ||||
|   export CUDA_LIB_DIR="/usr/local/cuda-12.4/lib64" | ||||
|  | ||||
|   export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|   export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|  | ||||
|   if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||||
|       export GENCODE=$OVERRIDE_GENCODE | ||||
|   fi | ||||
|   if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then | ||||
|       export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN | ||||
|   fi | ||||
|  | ||||
|   # all CUDA libs except CuDNN and CuBLAS | ||||
|   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \ | ||||
|       | xargs -I {} bash -c \ | ||||
|                 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||||
|  | ||||
|   # prune CuDNN and CuBLAS | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.4 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.4/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.1.0 $CUDA_BASE/nsight-systems-2023.4.4/ | ||||
| } | ||||
|  | ||||
| function prune_126 { | ||||
|   echo "Pruning CUDA 12.6" | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune static libs | ||||
|   ##################################################################################### | ||||
|   export NVPRUNE="/usr/local/cuda-12.6/bin/nvprune" | ||||
|   export CUDA_LIB_DIR="/usr/local/cuda-12.6/lib64" | ||||
|  | ||||
|   export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|   export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" | ||||
|  | ||||
|   if [[ -n "$OVERRIDE_GENCODE" ]]; then | ||||
|       export GENCODE=$OVERRIDE_GENCODE | ||||
|   fi | ||||
|   if [[ -n "$OVERRIDE_GENCODE_CUDNN" ]]; then | ||||
|       export GENCODE_CUDNN=$OVERRIDE_GENCODE_CUDNN | ||||
|   fi | ||||
|  | ||||
|   # all CUDA libs except CuDNN and CuBLAS | ||||
|   ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \ | ||||
|       | xargs -I {} bash -c \ | ||||
|                 "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" | ||||
|  | ||||
|   # prune CuDNN and CuBLAS | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a | ||||
|   $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a | ||||
|  | ||||
|   ##################################################################################### | ||||
|   # CUDA 12.6 prune visual tools | ||||
|   ##################################################################################### | ||||
|   export CUDA_BASE="/usr/local/cuda-12.6/" | ||||
|   rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2024.3.2 $CUDA_BASE/nsight-systems-2024.5.1/ | ||||
| } | ||||
|  | ||||
| function install_128 { | ||||
|   CUDNN_VERSION=9.8.0.87 | ||||
|   echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1" | ||||
| @ -146,38 +212,18 @@ function install_128 { | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| function install_130 { | ||||
|   CUDNN_VERSION=9.12.0.46 | ||||
|   echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1" | ||||
|   # install CUDA 13.0 in the same container | ||||
|   install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux | ||||
|  | ||||
|   # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement | ||||
|   install_cudnn 13 $CUDNN_VERSION | ||||
|  | ||||
|   install_nvshmem 13 $NVSHMEM_VERSION | ||||
|  | ||||
|   CUDA_VERSION=13.0 bash install_nccl.sh | ||||
|  | ||||
|   CUDA_VERSION=13.0 bash install_cusparselt.sh | ||||
|  | ||||
|   ldconfig | ||||
| } | ||||
|  | ||||
| # idiomatic parameter and option handling in sh | ||||
| while test $# -gt 0 | ||||
| do | ||||
|     case "$1" in | ||||
|     12.4) install_124; | ||||
|     12.4) install_124; prune_124 | ||||
|         ;; | ||||
|     12.6|12.6.*) install_126; | ||||
|     12.6|12.6.*) install_126; prune_126 | ||||
|         ;; | ||||
|     12.8|12.8.*) install_128; | ||||
|         ;; | ||||
|     12.9|12.9.*) install_129; | ||||
|         ;; | ||||
|     13.0|13.0.*) install_130; | ||||
|         ;; | ||||
|     *) echo "bad argument $1"; exit 1 | ||||
|         ;; | ||||
|     esac | ||||
|  | ||||
| @ -5,15 +5,7 @@ set -ex | ||||
| # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html | ||||
| mkdir tmp_cusparselt && cd tmp_cusparselt | ||||
|  | ||||
| if [[ ${CUDA_VERSION:0:4} =~ "13" ]]; then | ||||
|     arch_path='sbsa' | ||||
|     export TARGETARCH=${TARGETARCH:-$(uname -m)} | ||||
|     if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then | ||||
|         arch_path='x86_64' | ||||
|     fi | ||||
|     CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.8.0.4_cuda13-archive" | ||||
|     curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz | ||||
| elif [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-9]$ ]]; then | ||||
| if [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-9]$ ]]; then | ||||
|     arch_path='sbsa' | ||||
|     export TARGETARCH=${TARGETARCH:-$(uname -m)} | ||||
|     if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then | ||||
|  | ||||
| @ -5,7 +5,9 @@ set -ex | ||||
| source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh" | ||||
|  | ||||
| function install_huggingface() { | ||||
|   pip_install -r huggingface-requirements.txt | ||||
|   local version | ||||
|   commit=$(get_pinned_commit huggingface) | ||||
|   pip_install "git+https://github.com/huggingface/transformers@${commit}" | ||||
| } | ||||
|  | ||||
| function install_timm() { | ||||
| @ -24,6 +26,9 @@ function install_torchbench() { | ||||
|  | ||||
|   python install.py --continue_on_fail | ||||
|  | ||||
|   # soxr comes from https://github.com/huggingface/transformers/pull/39429 | ||||
|   pip install transformers==4.54.0 soxr==0.5.0 | ||||
|  | ||||
|   echo "Print all dependencies after TorchBench is installed" | ||||
|   python -mpip freeze | ||||
|   popd | ||||
|  | ||||
| @ -7,8 +7,6 @@ if [[ ${CUDA_VERSION:0:2} == "11" ]]; then | ||||
|   NCCL_VERSION=$(cat ci_commit_pins/nccl-cu11.txt) | ||||
| elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then | ||||
|   NCCL_VERSION=$(cat ci_commit_pins/nccl-cu12.txt) | ||||
| elif [[ ${CUDA_VERSION:0:2} == "13" ]]; then | ||||
|   NCCL_VERSION=$(cat ci_commit_pins/nccl-cu13.txt) | ||||
| else | ||||
|   echo "Unexpected CUDA_VERSION ${CUDA_VERSION}" | ||||
|   exit 1 | ||||
|  | ||||
| @ -19,8 +19,8 @@ pip_install \ | ||||
|   transformers==4.36.2 | ||||
|  | ||||
| pip_install coloredlogs packaging | ||||
| pip_install onnxruntime==1.22.1 | ||||
| pip_install onnxscript==0.4.0 | ||||
| pip_install onnxruntime==1.18.1 | ||||
| pip_install onnxscript==0.3.1 | ||||
|  | ||||
| # Cache the transformers model to be used later by ONNX tests. We need to run the transformers | ||||
| # package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/ | ||||
|  | ||||
| @ -57,7 +57,7 @@ if [ ! -f setup.py ]; then | ||||
|   cd python | ||||
| fi | ||||
|  | ||||
| pip_install pybind11==3.0.1 | ||||
| pip_install pybind11==2.13.6 | ||||
|  | ||||
| # TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527 | ||||
| as_jenkins sed -i -e 's/https:\/\/tritonlang.blob.core.windows.net\/llvm-builds/https:\/\/oaitriton.blob.core.windows.net\/public\/llvm-builds/g' setup.py | ||||
|  | ||||
| @ -44,12 +44,8 @@ function install_ucc() { | ||||
|  | ||||
|   ./autogen.sh | ||||
|  | ||||
|   if [[ -n "$CUDA_VERSION"  && $CUDA_VERSION == 13* ]]; then | ||||
|     NVCC_GENCODE="-gencode=arch=compute_86,code=compute_86" | ||||
|   else | ||||
|     # We only run distributed tests on Tesla M60 and A10G | ||||
|     NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86" | ||||
|   fi | ||||
|   # We only run distributed tests on Tesla M60 and A10G | ||||
|   NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86" | ||||
|  | ||||
|   if [[ -n "$ROCM_VERSION" ]]; then | ||||
|     if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then | ||||
|  | ||||
| @ -65,14 +65,10 @@ function install_ubuntu() { | ||||
|  | ||||
| function install_rhel() { | ||||
|     . /etc/os-release | ||||
|     if [[ "${ID}" == "rhel" ]]; then | ||||
|         if [[ ! " 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then | ||||
|             echo "RHEL version ${VERSION_ID} not supported" | ||||
|             exit | ||||
|         fi | ||||
|     elif [[ "${ID}" == "almalinux" ]]; then | ||||
|         # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64 | ||||
|         VERSION_ID="8.8" | ||||
|  | ||||
|     if [[ ! " 8.8 8.10 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then | ||||
|         echo "RHEL version ${VERSION_ID} not supported" | ||||
|         exit | ||||
|     fi | ||||
|  | ||||
|     dnf install -y 'dnf-command(config-manager)' | ||||
| @ -150,11 +146,11 @@ if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then | ||||
|     XPU_DRIVER_VERSION="/lts/2350" | ||||
| fi | ||||
|  | ||||
| # Default use Intel® oneAPI Deep Learning Essentials 2025.1 | ||||
| if [[ "$XPU_VERSION" == "2025.2" ]]; then | ||||
|     XPU_PACKAGES="intel-deep-learning-essentials-2025.2" | ||||
| else | ||||
| # Default use Intel® oneAPI Deep Learning Essentials 2025.0 | ||||
| if [[ "$XPU_VERSION" == "2025.1" ]]; then | ||||
|     XPU_PACKAGES="intel-deep-learning-essentials-2025.1" | ||||
| else | ||||
|     XPU_PACKAGES="intel-deep-learning-essentials-2025.0" | ||||
| fi | ||||
|  | ||||
| # The installation depends on the base OS | ||||
|  | ||||
| @ -69,11 +69,6 @@ RUN bash ./install_cuda.sh 12.9 | ||||
| RUN bash ./install_magma.sh 12.9 | ||||
| RUN ln -sf /usr/local/cuda-12.9 /usr/local/cuda | ||||
|  | ||||
| FROM cuda as cuda13.0 | ||||
| RUN bash ./install_cuda.sh 13.0 | ||||
| RUN bash ./install_magma.sh 13.0 | ||||
| RUN ln -sf /usr/local/cuda-13.0 /usr/local/cuda | ||||
|  | ||||
| FROM cpu as rocm | ||||
| ARG ROCM_VERSION | ||||
| ARG PYTORCH_ROCM_ARCH | ||||
|  | ||||
| @ -175,6 +175,6 @@ ENV XPU_DRIVER_TYPE ROLLING | ||||
| RUN python3 -m pip install --upgrade pip && \ | ||||
|     python3 -mpip install cmake==3.28.4 | ||||
| ADD ./common/install_xpu.sh install_xpu.sh | ||||
| ENV XPU_VERSION 2025.2 | ||||
| ENV XPU_VERSION 2025.1 | ||||
| RUN bash ./install_xpu.sh && rm install_xpu.sh | ||||
| RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd | ||||
|  | ||||
| @ -67,12 +67,6 @@ case ${image} in | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13" | ||||
|         MANY_LINUX_VERSION="2_28" | ||||
|         ;; | ||||
|     manylinux2_28-builder:cuda13*) | ||||
|         TARGET=cuda_final | ||||
|         GPU_IMAGE=amd64/almalinux:8 | ||||
|         DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13" | ||||
|         MANY_LINUX_VERSION="2_28" | ||||
|         ;; | ||||
|     manylinuxaarch64-builder:cuda*) | ||||
|         TARGET=cuda_final | ||||
|         GPU_IMAGE=amd64/almalinux:8 | ||||
|  | ||||
| @ -263,6 +263,11 @@ scipy==1.14.1 ; python_version >= "3.12" | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| tb-nightly==2.13.0a20230426 | ||||
| #Description: TensorBoard | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| # needed by torchgen utils | ||||
| typing-extensions>=4.10.0 | ||||
| #Description: type hints for python | ||||
| @ -339,7 +344,7 @@ onnx==1.18.0 | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
|  | ||||
| onnxscript==0.4.0 | ||||
| onnxscript==0.3.1 | ||||
| #Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal | ||||
| #Pinned versions: | ||||
| #test that import: | ||||
| @ -379,7 +384,7 @@ dataclasses_json==0.6.7 | ||||
| cmake==4.0.0 | ||||
| #Description: required for building | ||||
|  | ||||
| tlparse==0.4.0 | ||||
| tlparse==0.3.30 | ||||
| #Description: required for log parsing | ||||
|  | ||||
| cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x" | ||||
|  | ||||
| @ -1,7 +1,7 @@ | ||||
| sphinx==5.3.0 | ||||
| #Description: This is used to generate PyTorch docs | ||||
| #Pinned versions: 5.3.0 | ||||
| -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@1657ad2fc1acdc98aa719eebecbb0128a7c13ce4#egg=pytorch_sphinx_theme2 | ||||
| -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@722b7e6f9ca512fcc526ad07d62b3d28c50bb6cd#egg=pytorch_sphinx_theme2 | ||||
|  | ||||
| # TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering | ||||
| # but it doesn't seem to work and hangs around idly. The initial thought that it is probably | ||||
|  | ||||
| @ -1,155 +0,0 @@ | ||||
| # Cross-compilation Docker container for RISC-V architecture | ||||
| ARG UBUNTU_VERSION | ||||
| FROM --platform=linux/amd64 ubuntu:${UBUNTU_VERSION} as base | ||||
|  | ||||
| ARG UBUNTU_VERSION | ||||
|  | ||||
| ENV GCC_VERSION=14 | ||||
| ENV PYTHON_VERSION=3.12.3 | ||||
| ENV DEBIAN_FRONTEND=noninteractive | ||||
| ENV CC=riscv64-linux-gnu-gcc-${GCC_VERSION} | ||||
| ENV CXX=riscv64-linux-gnu-g++-${GCC_VERSION} | ||||
| ENV QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/ | ||||
| ENV SYSROOT=/opt/sysroot | ||||
|  | ||||
| # Install basic dependencies | ||||
| RUN apt-get update && apt-get install -y \ | ||||
|     ninja-build \ | ||||
|     autoconf \ | ||||
|     automake \ | ||||
|     libtool \ | ||||
|     patchelf \ | ||||
|     ccache \ | ||||
|     git \ | ||||
|     wget \ | ||||
|     python3-pip \ | ||||
|     python3-venv \ | ||||
|     python-is-python3 \ | ||||
|     cmake \ | ||||
|     sudo \ | ||||
|     lsb-release \ | ||||
|     gcc-${GCC_VERSION}-riscv64-linux-gnu \ | ||||
|     g++-${GCC_VERSION}-riscv64-linux-gnu \ | ||||
|     pkg-config \ | ||||
|     && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
| # Install user | ||||
| COPY ./common/install_user.sh install_user.sh | ||||
| RUN bash ./install_user.sh && rm install_user.sh | ||||
|  | ||||
| FROM base as python | ||||
| ARG ZLIB_VERSION=1.3.1 | ||||
| ARG FFI_VERSION=3.4.6 | ||||
| ARG BZ2_VERSION=1.0.8 | ||||
| ARG XZ_VERSION=5.4.6 | ||||
| ARG OPENSSL_VERSION=3.2.1 | ||||
|  | ||||
| # Set up sysroot directory for dependencies | ||||
| ENV PKG_CONFIG_PATH=${SYSROOT}/lib/pkgconfig | ||||
| ENV PKG_CONFIG_SYSROOT_DIR=${SYSROOT} | ||||
|  | ||||
| WORKDIR /opt | ||||
|  | ||||
| # Build zlib (for compression) | ||||
| RUN echo "--- Building zlib ---" \ | ||||
|     && wget -c https://www.zlib.net/zlib-${ZLIB_VERSION}.tar.gz \ | ||||
|     && tar -xf zlib-${ZLIB_VERSION}.tar.gz --no-same-permissions --no-same-owner \ | ||||
|     && cd zlib-${ZLIB_VERSION}/ \ | ||||
|     && mkdir build && cd build \ | ||||
|     && ../configure --prefix=${SYSROOT} \ | ||||
|     && make -j$(nproc) && make install \ | ||||
|     && cd ../.. | ||||
|  | ||||
| # Build libffi (for ctypes module) | ||||
| RUN echo "--- Building libffi ---" \ | ||||
|     && wget -c https://github.com/libffi/libffi/releases/download/v${FFI_VERSION}/libffi-${FFI_VERSION}.tar.gz \ | ||||
|     && tar -xf libffi-${FFI_VERSION}.tar.gz --no-same-permissions --no-same-owner \ | ||||
|     && cd libffi-${FFI_VERSION}/ \ | ||||
|     && mkdir build && cd build \ | ||||
|     && ../configure --prefix=${SYSROOT} --host=riscv64-linux-gnu --build=x86_64-linux-gnu \ | ||||
|     && make -j$(nproc) && make install \ | ||||
|     && cd ../.. | ||||
|  | ||||
| # Build bzip2 (for bz2 module) | ||||
| RUN echo "--- Building bzip2 ---" \ | ||||
|     && wget -c https://sourceware.org/pub/bzip2/bzip2-${BZ2_VERSION}.tar.gz \ | ||||
|     && tar -xf bzip2-${BZ2_VERSION}.tar.gz --no-same-permissions --no-same-owner \ | ||||
|     && cd bzip2-${BZ2_VERSION}/ \ | ||||
|     && make CC=riscv64-linux-gnu-gcc-${GCC_VERSION} bzip2 bzip2recover libbz2.a \ | ||||
|     && make CC=riscv64-linux-gnu-gcc-${GCC_VERSION} -f Makefile-libbz2_so \ | ||||
|     && make install PREFIX=${SYSROOT} \ | ||||
|     && cp libbz2.so.${BZ2_VERSION} ${SYSROOT}/lib/ \ | ||||
|     && cd ${SYSROOT}/lib/ \ | ||||
|     && ln -sf libbz2.so.${BZ2_VERSION} libbz2.so.1.0 \ | ||||
|     && ln -sf libbz2.so.1.0 libbz2.so \ | ||||
|     && cd /opt/ | ||||
|  | ||||
| # Build xz (for lzma module) | ||||
| RUN echo "--- Building xz ---" \ | ||||
|     && wget -c https://github.com/tukaani-project/xz/releases/download/v${XZ_VERSION}/xz-${XZ_VERSION}.tar.gz \ | ||||
|     && tar -xf xz-${XZ_VERSION}.tar.gz --no-same-permissions --no-same-owner \ | ||||
|     && cd xz-${XZ_VERSION} \ | ||||
|     && mkdir build && cd build \ | ||||
|     && ../configure --prefix=${SYSROOT} --host=riscv64-linux-gnu --build=x86_64-linux-gnu \ | ||||
|     && make -j$(nproc) && make install \ | ||||
|     && cd ../.. | ||||
|  | ||||
| # Build OpenSSL (for ssl module) | ||||
| RUN echo "--- Building OpenSSL ---" \ | ||||
|     && wget -c https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz \ | ||||
|     && tar -xf openssl-${OPENSSL_VERSION}.tar.gz --no-same-permissions --no-same-owner \ | ||||
|     && cd openssl-${OPENSSL_VERSION}/ \ | ||||
|     && mkdir build && cd build \ | ||||
|     && ../Configure linux64-riscv64 --prefix=${SYSROOT} \ | ||||
|     && make -j$(nproc) && make install_sw \ | ||||
|     && cd ../.. | ||||
|  | ||||
| # Build SQLite3 (for sqlite3 module) | ||||
| RUN echo "--- Building SQLite3 ---" \ | ||||
|     && wget -c https://www.sqlite.org/2024/sqlite-autoconf-3450200.tar.gz \ | ||||
|     && tar -xf sqlite-autoconf-3450200.tar.gz --no-same-permissions --no-same-owner \ | ||||
|     && cd sqlite-autoconf-3450200 \ | ||||
|     && mkdir build && cd build \ | ||||
|     && ../configure --prefix=${SYSROOT} --host=riscv64-linux-gnu --build=x86_64-linux-gnu \ | ||||
|     && make -j$(nproc) && make install \ | ||||
|     && cd ../.. | ||||
|  | ||||
| # Build and install RISC-V Python with all modules | ||||
| RUN wget -c https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \ | ||||
|     && tar -xf Python-${PYTHON_VERSION}.tgz --no-same-permissions --no-same-owner \ | ||||
|     && cd Python-${PYTHON_VERSION} \ | ||||
|     && mkdir build && cd build \ | ||||
|     && ../configure \ | ||||
|         --host=riscv64-linux-gnu \ | ||||
|         --build=x86_64-linux-gnu \ | ||||
|         --prefix=${SYSROOT} \ | ||||
|         --enable-shared \ | ||||
|         --disable-ipv6 \ | ||||
|         --with-build-python=/usr/bin/python3 \ | ||||
|         --with-ensurepip=no \ | ||||
|         ac_cv_file__dev_ptmx=yes \ | ||||
|         ac_cv_file__dev_ptc=no \ | ||||
|     && make -j$(nproc) \ | ||||
|     && make install | ||||
|  | ||||
| FROM base as final | ||||
| COPY --from=python             /opt/sysroot                       /opt/sysroot | ||||
|  | ||||
| # Install crossenv and cmake | ||||
| RUN pip install crossenv cmake==4.0.0 --break-system-packages \ | ||||
|     && /usr/bin/python3 -m crossenv ${SYSROOT}/bin/python3 /opt/riscv-cross-env | ||||
|  | ||||
| # Add pip-installed cmake binaries to PATH | ||||
| ENV PATH="/usr/local/bin:${PATH}" | ||||
|  | ||||
| # Set up cross Python environment | ||||
| SHELL ["/bin/bash", "-c"] | ||||
| RUN source /opt/riscv-cross-env/bin/activate \ | ||||
|     && pip install setuptools pyyaml typing_extensions wheel | ||||
|  | ||||
| # Set default environment variables for PyTorch build | ||||
| ENV Python_ROOT_DIR=${SYSROOT} | ||||
| ENV OPENSSL_ROOT_DIR=${SYSROOT} | ||||
|  | ||||
| USER jenkins | ||||
| CMD ["bash"] | ||||
| @ -96,11 +96,11 @@ ARG ANACONDA_PYTHON_VERSION | ||||
| ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION | ||||
| COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt | ||||
| COPY ci_commit_pins/huggingface.txt huggingface.txt | ||||
| COPY ci_commit_pins/timm.txt timm.txt | ||||
| COPY ci_commit_pins/torchbench.txt torchbench.txt | ||||
| RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt | ||||
|  | ||||
| # (optional) Install non-default Ninja version | ||||
| ARG NINJA_VERSION | ||||
|  | ||||
| @ -56,10 +56,10 @@ RUN rm install_openssl.sh | ||||
| ARG INDUCTOR_BENCHMARKS | ||||
| COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt | ||||
| COPY ci_commit_pins/huggingface.txt huggingface.txt | ||||
| COPY ci_commit_pins/timm.txt timm.txt | ||||
| RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt | ||||
|  | ||||
| # Install XPU Dependencies | ||||
| ARG XPU_VERSION | ||||
|  | ||||
| @ -66,7 +66,6 @@ ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/" | ||||
| # (optional) Install UCC | ||||
| ARG UCX_COMMIT | ||||
| ARG UCC_COMMIT | ||||
| ARG CUDA_VERSION | ||||
| ENV UCX_COMMIT $UCX_COMMIT | ||||
| ENV UCC_COMMIT $UCC_COMMIT | ||||
| ENV UCX_HOME /usr | ||||
| @ -97,11 +96,11 @@ RUN rm install_openssl.sh | ||||
| ARG INDUCTOR_BENCHMARKS | ||||
| COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh | ||||
| COPY ./common/common_utils.sh common_utils.sh | ||||
| COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt | ||||
| COPY ci_commit_pins/huggingface.txt huggingface.txt | ||||
| COPY ci_commit_pins/timm.txt timm.txt | ||||
| COPY ci_commit_pins/torchbench.txt torchbench.txt | ||||
| RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt | ||||
| RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface.txt torchbench.txt | ||||
|  | ||||
| ARG TRITON | ||||
| ARG TRITON_CPU | ||||
| @ -182,6 +181,7 @@ COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm | ||||
| RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; fi | ||||
|  | ||||
| # AWS specific CUDA build guidance | ||||
| ENV TORCH_CUDA_ARCH_LIST Maxwell | ||||
| ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all" | ||||
| ENV CUDA_PATH /usr/local/cuda | ||||
|  | ||||
|  | ||||
| @ -7,4 +7,4 @@ set -ex | ||||
|  | ||||
| SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" | ||||
|  | ||||
| USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh | ||||
| USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.9" ${SCRIPTPATH}/../manywheel/build.sh | ||||
|  | ||||
| @ -1,31 +0,0 @@ | ||||
| # 🔧 Lumen_cli | ||||
| A Python CLI tool for building and testing PyTorch-based components, using a YAML configuration file for structured, repeatable workflows. | ||||
|  | ||||
|  | ||||
| ## Features | ||||
| - **Build** | ||||
|     - external projects (e.g. vLLM) | ||||
|  | ||||
| ## 📦 Installation | ||||
| at the root of the pytorch repo | ||||
| ```bash | ||||
| pip install -e .ci/lumen_cli | ||||
| ``` | ||||
|  | ||||
| ## Run the cli tool | ||||
| The cli tool must be used at root of pytorch repo, as example to run build external vllm: | ||||
| ```bash | ||||
| python -m cli.run build external vllm | ||||
| ``` | ||||
| this will run the build steps with default behaviour for vllm project. | ||||
|  | ||||
| to see help messages, run | ||||
| ```bash | ||||
| python3 -m cli.run --help | ||||
| ``` | ||||
|  | ||||
| ## Add customized external build logics | ||||
| To add a new external build, for instance, add a new external build logics: | ||||
| 1. create the build function in cli/lib folder | ||||
| 2. register your target and the main build function at  EXTERNAL_BUILD_TARGET_DISPATCH in `cli/build_cli/register_build.py` | ||||
| 3. [optional] create your ci config file in .github/ci_configs/${EXTERNAL_PACKAGE_NAME}.yaml | ||||
| @ -1,37 +0,0 @@ | ||||
| import argparse | ||||
| import logging | ||||
|  | ||||
| from cli.lib.common.cli_helper import register_targets, RichHelp, TargetSpec | ||||
| from cli.lib.core.vllm.vllm_build import VllmBuildRunner | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Maps targets to their argparse configuration and runner | ||||
| # it adds new target to path python -m cli.run build external {target} with buildrunner | ||||
| _TARGETS: dict[str, TargetSpec] = { | ||||
|     "vllm": { | ||||
|         "runner": VllmBuildRunner, | ||||
|         "help": "Build vLLM using docker buildx.", | ||||
|     } | ||||
|     # add yours ... | ||||
| } | ||||
|  | ||||
|  | ||||
| def register_build_commands(subparsers: argparse._SubParsersAction) -> None: | ||||
|     build_parser = subparsers.add_parser( | ||||
|         "build", | ||||
|         help="Build related commands", | ||||
|         formatter_class=RichHelp, | ||||
|     ) | ||||
|     build_subparsers = build_parser.add_subparsers(dest="build_command", required=True) | ||||
|     overview = "\n".join( | ||||
|         f"  {name:12} {spec.get('help', '')}" for name, spec in _TARGETS.items() | ||||
|     ) | ||||
|     external_parser = build_subparsers.add_parser( | ||||
|         "external", | ||||
|         help="Build external targets", | ||||
|         description="Build third-party targets.\n\nAvailable targets:\n" + overview, | ||||
|         formatter_class=RichHelp, | ||||
|     ) | ||||
|     register_targets(external_parser, _TARGETS) | ||||
| @ -1,71 +0,0 @@ | ||||
| """ | ||||
| Cli Argparser Utility helpers for CLI tasks. | ||||
|  | ||||
| """ | ||||
|  | ||||
| import argparse | ||||
| from abc import ABC, abstractmethod | ||||
|  | ||||
|  | ||||
| try: | ||||
|     from typing import Any, Callable, Required, TypedDict  # Python 3.11+ | ||||
| except ImportError: | ||||
|     from typing import Any, Callable, TypedDict | ||||
|  | ||||
|     from typing_extensions import Required  # Fallback for Python <3.11 | ||||
|  | ||||
|  | ||||
| class BaseRunner(ABC): | ||||
|     def __init__(self, args: Any) -> None: | ||||
|         self.args = args | ||||
|  | ||||
|     @abstractmethod | ||||
|     def run(self) -> None: | ||||
|         """runs main logics, required""" | ||||
|  | ||||
|  | ||||
| # Pretty help: keep newlines + show defaults | ||||
| class RichHelp( | ||||
|     argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter | ||||
| ): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| class TargetSpec(TypedDict, total=False): | ||||
|     """CLI subcommand specification with bA.""" | ||||
|  | ||||
|     runner: Required[type[BaseRunner]] | ||||
|     help: str | ||||
|     description: str | ||||
|     add_arguments: Callable[[argparse.ArgumentParser], None] | ||||
|  | ||||
|  | ||||
| def register_targets( | ||||
|     parser: argparse.ArgumentParser, | ||||
|     target_specs: dict[str, TargetSpec], | ||||
|     common_args: Callable[[argparse.ArgumentParser], None] = lambda _: None, | ||||
| ) -> None: | ||||
|     """Register target subcommands.""" | ||||
|     targets = parser.add_subparsers( | ||||
|         dest="target", | ||||
|         required=True, | ||||
|         metavar="{" + ",".join(target_specs.keys()) + "}", | ||||
|     ) | ||||
|  | ||||
|     for name, spec in target_specs.items(): | ||||
|         desc = spec.get("description") or spec["runner"].__doc__ or "" | ||||
|  | ||||
|         p = targets.add_parser( | ||||
|             name, | ||||
|             help=spec.get("help", ""), | ||||
|             description=desc.strip(), | ||||
|             formatter_class=RichHelp, | ||||
|         ) | ||||
|         p.set_defaults( | ||||
|             func=lambda args, cls=spec["runner"]: cls(args).run(), | ||||
|             _runner_class=spec["runner"], | ||||
|         ) | ||||
|         if "add_arguments" in spec and callable(spec["add_arguments"]): | ||||
|             spec["add_arguments"](p) | ||||
|         if common_args: | ||||
|             common_args(p) | ||||
| @ -1,42 +0,0 @@ | ||||
| """ | ||||
| Docker Utility helpers for CLI tasks. | ||||
| """ | ||||
|  | ||||
| import logging | ||||
| from typing import Optional | ||||
|  | ||||
| import docker | ||||
| from docker.errors import APIError, NotFound | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # lazy singleton so we don't reconnect every call | ||||
| _docker_client: Optional[docker.DockerClient] = None | ||||
|  | ||||
|  | ||||
| def _get_client() -> docker.DockerClient: | ||||
|     global _docker_client | ||||
|     if _docker_client is None: | ||||
|         _docker_client = docker.from_env() | ||||
|     return _docker_client | ||||
|  | ||||
|  | ||||
| def local_image_exists( | ||||
|     image_name: str, client: Optional[docker.DockerClient] = None | ||||
| ) -> bool: | ||||
|     """Return True if a local Docker image exists.""" | ||||
|     if not image_name: | ||||
|         return False | ||||
|  | ||||
|     client = client or _get_client() | ||||
|     try: | ||||
|         client.images.get(image_name) | ||||
|         return True | ||||
|     except (NotFound, APIError) as e: | ||||
|         logger.error( | ||||
|             "Error when checking Docker image '%s': %s", | ||||
|             image_name, | ||||
|             e.explanation if hasattr(e, "explanation") else str(e), | ||||
|         ) | ||||
|         return False | ||||
| @ -1,110 +0,0 @@ | ||||
| """ | ||||
| Environment Variables and Dataclasses Utility helpers for CLI tasks. | ||||
| """ | ||||
|  | ||||
| import os | ||||
| from dataclasses import field, fields, is_dataclass, MISSING | ||||
| from pathlib import Path | ||||
| from textwrap import indent | ||||
| from typing import Optional, Union | ||||
|  | ||||
| from cli.lib.common.utils import str2bool | ||||
|  | ||||
|  | ||||
| def get_env(name: str, default: str = "") -> str: | ||||
|     """Get environment variable with default fallback.""" | ||||
|     return os.environ.get(name) or default | ||||
|  | ||||
|  | ||||
| def env_path_optional( | ||||
|     name: str, | ||||
|     default: Optional[Union[str, Path]] = None, | ||||
|     resolve: bool = True, | ||||
| ) -> Optional[Path]: | ||||
|     """Get environment variable as optional Path.""" | ||||
|     val = get_env(name) or default | ||||
|     if not val: | ||||
|         return None | ||||
|  | ||||
|     path = Path(val) | ||||
|     return path.resolve() if resolve else path | ||||
|  | ||||
|  | ||||
| def env_path( | ||||
|     name: str, | ||||
|     default: Optional[Union[str, Path]] = None, | ||||
|     resolve: bool = True, | ||||
| ) -> Path: | ||||
|     """Get environment variable as Path, raise if missing.""" | ||||
|     path = env_path_optional(name, default, resolve) | ||||
|     if not path: | ||||
|         raise ValueError(f"Missing path value for {name}") | ||||
|     return path | ||||
|  | ||||
|  | ||||
| def env_bool( | ||||
|     name: str, | ||||
|     default: bool = False, | ||||
| ) -> bool: | ||||
|     val = get_env(name) | ||||
|     if not val: | ||||
|         return default | ||||
|     return str2bool(val) | ||||
|  | ||||
|  | ||||
| def env_bool_field( | ||||
|     name: str, | ||||
|     default: bool = False, | ||||
| ): | ||||
|     return field(default_factory=lambda: env_bool(name, default)) | ||||
|  | ||||
|  | ||||
| def env_path_field( | ||||
|     name: str, | ||||
|     default: Union[str, Path] = "", | ||||
|     *, | ||||
|     resolve: bool = True, | ||||
| ) -> Path: | ||||
|     return field(default_factory=lambda: env_path(name, default, resolve=resolve)) | ||||
|  | ||||
|  | ||||
| def env_str_field( | ||||
|     name: str, | ||||
|     default: str = "", | ||||
| ) -> str: | ||||
|     return field(default_factory=lambda: get_env(name, default)) | ||||
|  | ||||
|  | ||||
| def generate_dataclass_help(cls) -> str: | ||||
|     """Auto-generate help text for dataclass fields.""" | ||||
|     if not is_dataclass(cls): | ||||
|         raise TypeError(f"{cls} is not a dataclass") | ||||
|  | ||||
|     def get_value(f): | ||||
|         if f.default is not MISSING: | ||||
|             return f.default | ||||
|         if f.default_factory is not MISSING: | ||||
|             try: | ||||
|                 return f.default_factory() | ||||
|             except Exception as e: | ||||
|                 return f"<error: {e}>" | ||||
|         return "<required>" | ||||
|  | ||||
|     lines = [f"{f.name:<22} = {repr(get_value(f))}" for f in fields(cls)] | ||||
|     return indent("\n".join(lines), "    ") | ||||
|  | ||||
|  | ||||
| def with_params_help(params_cls: type, title: str = "Parameter defaults"): | ||||
|     """ | ||||
|     Class decorator that appends a help table generated from another dataclass | ||||
|     (e.g., VllmParameters) to the decorated class's docstring. | ||||
|     """ | ||||
|     if not is_dataclass(params_cls): | ||||
|         raise TypeError(f"{params_cls} must be a dataclass") | ||||
|  | ||||
|     def _decorator(cls: type) -> type: | ||||
|         block = generate_dataclass_help(params_cls) | ||||
|         cls.__doc__ = (cls.__doc__ or "") + f"\n\n{title}:\n{block}" | ||||
|         return cls | ||||
|  | ||||
|     return _decorator | ||||
| @ -1,143 +0,0 @@ | ||||
| from __future__ import annotations | ||||
|  | ||||
| import logging | ||||
| import os | ||||
| import textwrap | ||||
| from pathlib import Path | ||||
| from typing import TYPE_CHECKING | ||||
|  | ||||
| from cli.lib.common.utils import get_wheels | ||||
| from jinja2 import Template | ||||
|  | ||||
|  | ||||
| if TYPE_CHECKING: | ||||
|     from collections.abc import Iterable, Mapping | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| _TPL_CONTENT = Template( | ||||
|     textwrap.dedent("""\ | ||||
|     ## {{ title }} | ||||
|  | ||||
|     ```{{ lang }} | ||||
|     {{ content }} | ||||
|     ``` | ||||
| """) | ||||
| ) | ||||
|  | ||||
| _TPL_LIST_ITEMS = Template( | ||||
|     textwrap.dedent("""\ | ||||
|     ## {{ title }} | ||||
|     {% for it in items %} | ||||
|     - {{ it.pkg }}: {{ it.relpath }} | ||||
|     {% else %} | ||||
|     _(no item found)_ | ||||
|     {% endfor %} | ||||
|     """) | ||||
| ) | ||||
|  | ||||
| _TPL_TABLE = Template( | ||||
|     textwrap.dedent("""\ | ||||
|     {%- if rows %} | ||||
|     | {{ cols | join(' | ') }} | | ||||
|     |{%- for _ in cols %} --- |{%- endfor %} | ||||
|     {%- for r in rows %} | ||||
|     | {%- for c in cols %} {{ r.get(c, "") }} |{%- endfor %} | ||||
|     {%- endfor %} | ||||
|     {%- else %} | ||||
|     _(no data)_ | ||||
|     {%- endif %} | ||||
| """) | ||||
| ) | ||||
|  | ||||
|  | ||||
| def gh_summary_path() -> Path | None: | ||||
|     """Return the Path to the GitHub step summary file, or None if not set.""" | ||||
|     p = os.environ.get("GITHUB_STEP_SUMMARY") | ||||
|     return Path(p) if p else None | ||||
|  | ||||
|  | ||||
| def write_gh_step_summary(md: str, *, append_content: bool = True) -> bool: | ||||
|     """ | ||||
|     Write Markdown content to the GitHub Step Summary file if GITHUB_STEP_SUMMARY is set. | ||||
|     append_content: default true, if True, append to the end of the file, else overwrite the whole file | ||||
|  | ||||
|     Returns: | ||||
|         True if written successfully (in GitHub Actions environment), | ||||
|         False if skipped (e.g., running locally where the variable is not set). | ||||
|     """ | ||||
|     sp = gh_summary_path() | ||||
|     if not sp: | ||||
|         logger.info("[gh-summary] GITHUB_STEP_SUMMARY not set, skipping write.") | ||||
|         return False | ||||
|  | ||||
|     md_clean = textwrap.dedent(md).strip() + "\n" | ||||
|  | ||||
|     mode = "a" if append_content else "w" | ||||
|     with sp.open(mode, encoding="utf-8") as f: | ||||
|         f.write(md_clean) | ||||
|     return True | ||||
|  | ||||
|  | ||||
| def md_heading(text: str, level: int = 2) -> str: | ||||
|     """Generate a Markdown heading string with the given level (1-6).""" | ||||
|     return f"{'#' * max(1, min(level, 6))} {text}\n" | ||||
|  | ||||
|  | ||||
| def md_details(summary: str, content: str) -> str: | ||||
|     """Generate a collapsible <details> block with a summary and inner content.""" | ||||
|     return f"<details>\n<summary>{summary}</summary>\n\n{content}\n\n</details>\n" | ||||
|  | ||||
|  | ||||
| def summarize_content_from_file( | ||||
|     output_dir: Path, | ||||
|     freeze_file: str, | ||||
|     title: str = "Content from file", | ||||
|     code_lang: str = "",  # e.g. "text" or "ini" | ||||
| ) -> bool: | ||||
|     f = Path(output_dir) / freeze_file | ||||
|     if not f.exists(): | ||||
|         return False | ||||
|     content = f.read_text(encoding="utf-8").strip() | ||||
|     md = render_content(content, title=title, lang=code_lang) | ||||
|     return write_gh_step_summary(md) | ||||
|  | ||||
|  | ||||
| def summarize_wheels(path: Path, title: str = "Wheels", max_depth: int = 3): | ||||
|     items = get_wheels(path, max_depth=max_depth) | ||||
|     if not items: | ||||
|         return False | ||||
|     md = render_list(items, title=title) | ||||
|     return write_gh_step_summary(md) | ||||
|  | ||||
|  | ||||
| def md_kv_table(rows: Iterable[Mapping[str, str | int | float]]) -> str: | ||||
|     """ | ||||
|     Render a list of dicts as a Markdown table using Jinja template. | ||||
|     """ | ||||
|     rows = list(rows) | ||||
|     cols = list({k for r in rows for k in r.keys()}) | ||||
|     md = _TPL_TABLE.render(cols=cols, rows=rows).strip() + "\n" | ||||
|     return md | ||||
|  | ||||
|  | ||||
| def render_list( | ||||
|     items: Iterable[str], | ||||
|     *, | ||||
|     title: str = "List", | ||||
| ) -> str: | ||||
|     tpl = _TPL_LIST_ITEMS | ||||
|     md = tpl.render(title=title, items=items) | ||||
|     return md | ||||
|  | ||||
|  | ||||
| def render_content( | ||||
|     content: str, | ||||
|     *, | ||||
|     title: str = "Content", | ||||
|     lang: str = "text", | ||||
| ) -> str: | ||||
|     tpl = _TPL_CONTENT | ||||
|     md = tpl.render(title=title, content=content, lang=lang) | ||||
|     return md | ||||
| @ -1,69 +0,0 @@ | ||||
| """ | ||||
| Git Utility helpers for CLI tasks. | ||||
| """ | ||||
|  | ||||
| import logging | ||||
| from pathlib import Path | ||||
|  | ||||
| from cli.lib.common.path_helper import remove_dir | ||||
| from git import GitCommandError, RemoteProgress, Repo | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| class PrintProgress(RemoteProgress): | ||||
|     """Simple progress logger for git operations.""" | ||||
|  | ||||
|     def __init__(self, interval: int = 5): | ||||
|         super().__init__() | ||||
|         self._last_percent = -1 | ||||
|         self._interval = interval | ||||
|  | ||||
|     def update(self, op_code, cur, max=None, message=""): | ||||
|         msg = self._cur_line or message | ||||
|         if max and cur: | ||||
|             percent = int(cur / max * 100) | ||||
|             if percent != self._last_percent and percent % self._interval == 0: | ||||
|                 self._last_percent = percent | ||||
|                 logger.info("Progress: %d%% - %s", percent, msg) | ||||
|         elif msg: | ||||
|             logger.info(msg) | ||||
|  | ||||
|  | ||||
| def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules=False): | ||||
|     """Clone repository with pinned commit and optional submodules.""" | ||||
|     dst = dst or target | ||||
|  | ||||
|     try: | ||||
|         logger.info("Cloning %s to %s", target, dst) | ||||
|  | ||||
|         # Clone and fetch | ||||
|         remove_dir(dst) | ||||
|         r = Repo.clone_from(repo, dst, progress=PrintProgress()) | ||||
|         r.git.fetch("--all", "--tags") | ||||
|  | ||||
|         # Checkout pinned commit | ||||
|         commit = get_post_build_pinned_commit(target) | ||||
|         logger.info("Checking out pinned %s commit %s", target, commit) | ||||
|         r.git.checkout(commit) | ||||
|  | ||||
|         # Update submodules if requested | ||||
|         if update_submodules and r.submodules: | ||||
|             logger.info("Updating %d submodule(s)", len(r.submodules)) | ||||
|             for sm in r.submodules: | ||||
|                 sm.update(init=True, recursive=True, progress=PrintProgress()) | ||||
|  | ||||
|         logger.info("Successfully cloned %s", target) | ||||
|         return r, commit | ||||
|  | ||||
|     except GitCommandError as e: | ||||
|         logger.error("Git operation failed: %s", e) | ||||
|         raise | ||||
|  | ||||
|  | ||||
| def get_post_build_pinned_commit(name: str, prefix=".github/ci_commit_pins") -> str: | ||||
|     path = Path(prefix) / f"{name}.txt" | ||||
|     if not path.exists(): | ||||
|         raise FileNotFoundError(f"Pin file not found: {path}") | ||||
|     return path.read_text(encoding="utf-8").strip() | ||||
| @ -1,14 +0,0 @@ | ||||
| """ | ||||
| Logger Utility helpers for CLI tasks. | ||||
| """ | ||||
|  | ||||
| import logging | ||||
| import sys | ||||
|  | ||||
|  | ||||
| def setup_logging(level: int = logging.INFO): | ||||
|     logging.basicConfig( | ||||
|         level=level, | ||||
|         format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", | ||||
|         stream=sys.stdout, | ||||
|     ) | ||||
| @ -1,62 +0,0 @@ | ||||
| """Path utility helpers for CLI tasks.""" | ||||
|  | ||||
| import logging | ||||
| import shutil | ||||
| from pathlib import Path | ||||
| from typing import Union | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def get_path(path: Union[str, Path], resolve: bool = False) -> Path: | ||||
|     """Convert to Path object, optionally resolving to absolute path.""" | ||||
|     if not path: | ||||
|         raise ValueError("Path cannot be None or empty") | ||||
|     result = Path(path) | ||||
|     return result.resolve() if resolve else result | ||||
|  | ||||
|  | ||||
| def ensure_dir_exists(path: Union[str, Path]) -> Path: | ||||
|     """Create directory if it doesn't exist.""" | ||||
|     path_obj = get_path(path) | ||||
|     path_obj.mkdir(parents=True, exist_ok=True) | ||||
|     return path_obj | ||||
|  | ||||
|  | ||||
| def remove_dir(path: Union[str, Path, None]) -> None: | ||||
|     """Remove directory if it exists.""" | ||||
|     if not path: | ||||
|         return | ||||
|     path_obj = get_path(path) | ||||
|     if path_obj.exists(): | ||||
|         shutil.rmtree(path_obj) | ||||
|  | ||||
|  | ||||
| def force_create_dir(path: Union[str, Path]) -> Path: | ||||
|     """Remove directory if exists, then create fresh empty directory.""" | ||||
|     remove_dir(path) | ||||
|     return ensure_dir_exists(path) | ||||
|  | ||||
|  | ||||
| def copy(src: Union[str, Path], dst: Union[str, Path]) -> None: | ||||
|     """Copy file or directory from src to dst.""" | ||||
|     src_path = get_path(src, resolve=True) | ||||
|     dst_path = get_path(dst, resolve=True) | ||||
|  | ||||
|     if not src_path.exists(): | ||||
|         raise FileNotFoundError(f"Source does not exist: {src_path}") | ||||
|  | ||||
|     dst_path.parent.mkdir(parents=True, exist_ok=True) | ||||
|  | ||||
|     if src_path.is_file(): | ||||
|         shutil.copy2(src_path, dst_path) | ||||
|     elif src_path.is_dir(): | ||||
|         shutil.copytree(src_path, dst_path, dirs_exist_ok=True) | ||||
|     else: | ||||
|         raise ValueError(f"Unsupported path type: {src_path}") | ||||
|  | ||||
|  | ||||
| def is_path_exist(path: Union[str, Path, None]) -> bool: | ||||
|     """Check if path exists.""" | ||||
|     return bool(path and get_path(path).exists()) | ||||
| @ -1,71 +0,0 @@ | ||||
| import glob | ||||
| import logging | ||||
| import shlex | ||||
| import shutil | ||||
| import sys | ||||
| from collections.abc import Iterable | ||||
| from importlib.metadata import PackageNotFoundError, version  # noqa: UP035 | ||||
| from typing import Optional, Union | ||||
|  | ||||
| from cli.lib.common.utils import run_command | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def pip_install_packages( | ||||
|     packages: Iterable[str] = (), | ||||
|     env=None, | ||||
|     *, | ||||
|     requirements: Optional[str] = None, | ||||
|     constraints: Optional[str] = None, | ||||
|     prefer_uv: bool = False, | ||||
| ) -> None: | ||||
|     use_uv = prefer_uv and shutil.which("uv") is not None | ||||
|     base = ( | ||||
|         [sys.executable, "-m", "uv", "pip", "install"] | ||||
|         if use_uv | ||||
|         else [sys.executable, "-m", "pip", "install"] | ||||
|     ) | ||||
|     cmd = base[:] | ||||
|     if requirements: | ||||
|         cmd += ["-r", requirements] | ||||
|     if constraints: | ||||
|         cmd += ["-c", constraints] | ||||
|     cmd += list(packages) | ||||
|     logger.info("pip installing packages: %s", " ".join(map(shlex.quote, cmd))) | ||||
|     run_command(" ".join(map(shlex.quote, cmd)), env=env) | ||||
|  | ||||
|  | ||||
| def pip_install_first_match(pattern: str, extras: Optional[str] = None, pref_uv=False): | ||||
|     wheel = first_matching_pkg(pattern) | ||||
|     target = f"{wheel}[{extras}]" if extras else wheel | ||||
|     logger.info("Installing %s...", target) | ||||
|     pip_install_packages([target], prefer_uv=pref_uv) | ||||
|  | ||||
|  | ||||
| def run_python(args: Union[str, list[str]], env=None): | ||||
|     """ | ||||
|     Run the python in the current environment. | ||||
|     """ | ||||
|     if isinstance(args, str): | ||||
|         args = shlex.split(args) | ||||
|     cmd = [sys.executable] + args | ||||
|     run_command(" ".join(map(shlex.quote, cmd)), env=env) | ||||
|  | ||||
|  | ||||
| def pkg_exists(name: str) -> bool: | ||||
|     try: | ||||
|         pkg_version = version(name) | ||||
|         logger.info("%s already exist with version: %s", name, pkg_version) | ||||
|         return True | ||||
|     except PackageNotFoundError: | ||||
|         logger.info("%s is not installed", name) | ||||
|         return False | ||||
|  | ||||
|  | ||||
| def first_matching_pkg(pattern: str) -> str: | ||||
|     matches = sorted(glob.glob(pattern)) | ||||
|     if not matches: | ||||
|         raise FileNotFoundError(f"No wheel matching: {pattern}") | ||||
|     return matches[0] | ||||
| @ -1,139 +0,0 @@ | ||||
| """ | ||||
| General Utility helpers for CLI tasks. | ||||
| """ | ||||
|  | ||||
| import logging | ||||
| import os | ||||
| import shlex | ||||
| import subprocess | ||||
| import sys | ||||
| from contextlib import contextmanager | ||||
| from pathlib import Path | ||||
| from typing import Optional | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def run_command( | ||||
|     cmd: str, | ||||
|     use_shell: bool = False, | ||||
|     log_cmd: bool = True, | ||||
|     cwd: Optional[str] = None, | ||||
|     env: Optional[dict] = None, | ||||
|     check: bool = True, | ||||
| ) -> int: | ||||
|     """Run a command with optional shell execution.""" | ||||
|     if use_shell: | ||||
|         args = cmd | ||||
|         log_prefix = "[shell]" | ||||
|         executable = "/bin/bash" | ||||
|     else: | ||||
|         args = shlex.split(cmd) | ||||
|         log_prefix = "[cmd]" | ||||
|         executable = None | ||||
|  | ||||
|     if log_cmd: | ||||
|         display_cmd = cmd if use_shell else " ".join(args) | ||||
|         logger.info("%s %s", log_prefix, display_cmd) | ||||
|  | ||||
|     run_env = {**os.environ, **(env or {})} | ||||
|  | ||||
|     proc = subprocess.run( | ||||
|         args, | ||||
|         shell=use_shell, | ||||
|         executable=executable, | ||||
|         stdout=sys.stdout, | ||||
|         stderr=sys.stderr, | ||||
|         cwd=cwd, | ||||
|         env=run_env, | ||||
|         check=False, | ||||
|     ) | ||||
|  | ||||
|     if check and proc.returncode != 0: | ||||
|         logger.error( | ||||
|             "%s Command failed (exit %s): %s", log_prefix, proc.returncode, cmd | ||||
|         ) | ||||
|         raise subprocess.CalledProcessError( | ||||
|             proc.returncode, args if not use_shell else cmd | ||||
|         ) | ||||
|  | ||||
|     return proc.returncode | ||||
|  | ||||
|  | ||||
| def str2bool(value: Optional[str]) -> bool: | ||||
|     """Convert environment variables to boolean values.""" | ||||
|     if not value: | ||||
|         return False | ||||
|     if not isinstance(value, str): | ||||
|         raise ValueError( | ||||
|             f"Expected a string value for boolean conversion, got {type(value)}" | ||||
|         ) | ||||
|     value = value.strip().lower() | ||||
|  | ||||
|     true_value_set = {"1", "true", "t", "yes", "y", "on", "enable", "enabled", "found"} | ||||
|     false_value_set = {"0", "false", "f", "no", "n", "off", "disable"} | ||||
|  | ||||
|     if value in true_value_set: | ||||
|         return True | ||||
|     if value in false_value_set: | ||||
|         return False | ||||
|     raise ValueError(f"Invalid string value for boolean conversion: {value}") | ||||
|  | ||||
|  | ||||
| @contextmanager | ||||
| def temp_environ(updates: dict[str, str]): | ||||
|     """ | ||||
|     Temporarily set environment variables and restore them after the block. | ||||
|     Args: | ||||
|         updates: Dict of environment variables to set. | ||||
|     """ | ||||
|     missing = object() | ||||
|     old: dict[str, str | object] = {k: os.environ.get(k, missing) for k in updates} | ||||
|     try: | ||||
|         os.environ.update(updates) | ||||
|         yield | ||||
|     finally: | ||||
|         for k, v in old.items(): | ||||
|             if v is missing: | ||||
|                 os.environ.pop(k, None) | ||||
|             else: | ||||
|                 os.environ[k] = v  # type: ignore[arg-type] | ||||
|  | ||||
|  | ||||
| @contextmanager | ||||
| def working_directory(path: str): | ||||
|     """ | ||||
|     Temporarily change the working directory inside a context. | ||||
|     """ | ||||
|     if not path: | ||||
|         # No-op context | ||||
|         yield | ||||
|         return | ||||
|     prev_cwd = os.getcwd() | ||||
|     try: | ||||
|         os.chdir(path) | ||||
|         yield | ||||
|     finally: | ||||
|         os.chdir(prev_cwd) | ||||
|  | ||||
|  | ||||
| def get_wheels( | ||||
|     output_dir: Path, | ||||
|     max_depth: Optional[int] = None, | ||||
| ) -> list[str]: | ||||
|     """Return a list of wheels found in the given output directory.""" | ||||
|     root = Path(output_dir) | ||||
|     if not root.exists(): | ||||
|         return [] | ||||
|     items = [] | ||||
|     for dirpath, _, filenames in os.walk(root): | ||||
|         depth = Path(dirpath).relative_to(root).parts | ||||
|         if max_depth is not None and len(depth) > max_depth: | ||||
|             continue | ||||
|         for fname in sorted(filenames): | ||||
|             if fname.endswith(".whl"): | ||||
|                 pkg = fname.split("-")[0] | ||||
|                 relpath = str((Path(dirpath) / fname).relative_to(root)) | ||||
|                 items.append({"pkg": pkg, "relpath": relpath}) | ||||
|     return items | ||||
| @ -1,256 +0,0 @@ | ||||
| import logging | ||||
| import os | ||||
| import textwrap | ||||
| from typing import Any | ||||
|  | ||||
| from cli.lib.common.gh_summary import write_gh_step_summary | ||||
| from cli.lib.common.git_helper import clone_external_repo | ||||
| from cli.lib.common.pip_helper import pip_install_packages | ||||
| from cli.lib.common.utils import run_command, temp_environ, working_directory | ||||
| from jinja2 import Template | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| _TPL_VLLM_INFO = Template( | ||||
|     textwrap.dedent("""\ | ||||
|     ##  Vllm against Pytorch CI Test Summary | ||||
|     **Vllm Commit**: [{{ vllm_commit }}](https://github.com/vllm-project/vllm/commit/{{ vllm_commit }}) | ||||
|     {%- if torch_sha %} | ||||
|     **Pytorch Commit**: [{{ torch_sha }}](https://github.com/pytorch/pytorch/commit/{{ torch_sha }}) | ||||
|     {%- endif %} | ||||
| """) | ||||
| ) | ||||
|  | ||||
|  | ||||
| def sample_vllm_test_library(): | ||||
|     """ | ||||
|     Simple sample to unblock the vllm ci development, which is mimic to | ||||
|     https://github.com/vllm-project/vllm/blob/main/.buildkite/test-pipeline.yaml | ||||
|     see run_test_plan for more details | ||||
|     """ | ||||
|     # TODO(elainewy): Read from yaml file to handle the env and tests for vllm | ||||
|     return { | ||||
|         "vllm_basic_correctness_test": { | ||||
|             "title": "Basic Correctness Test", | ||||
|             "id": "vllm_basic_correctness_test", | ||||
|             "env_vars": { | ||||
|                 "VLLM_WORKER_MULTIPROC_METHOD": "spawn", | ||||
|             }, | ||||
|             "steps": [ | ||||
|                 "pytest -v -s basic_correctness/test_cumem.py", | ||||
|                 "pytest -v -s basic_correctness/test_basic_correctness.py", | ||||
|                 "pytest -v -s basic_correctness/test_cpu_offload.py", | ||||
|                 "VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_basic_models_test": { | ||||
|             "title": "Basic models test", | ||||
|             "id": "vllm_basic_models_test", | ||||
|             "steps": [ | ||||
|                 "pytest -v -s models/test_transformers.py", | ||||
|                 "pytest -v -s models/test_registry.py", | ||||
|                 "pytest -v -s models/test_utils.py", | ||||
|                 "pytest -v -s models/test_vision.py", | ||||
|                 "pytest -v -s models/test_initialization.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_entrypoints_test": { | ||||
|             "title": "Entrypoints Test ", | ||||
|             "id": "vllm_entrypoints_test", | ||||
|             "env_vars": { | ||||
|                 "VLLM_WORKER_MULTIPROC_METHOD": "spawn", | ||||
|             }, | ||||
|             "steps": [ | ||||
|                 " ".join( | ||||
|                     [ | ||||
|                         "pytest", | ||||
|                         "-v", | ||||
|                         "-s", | ||||
|                         "entrypoints/llm", | ||||
|                         "--ignore=entrypoints/llm/test_lazy_outlines.py", | ||||
|                         "--ignore=entrypoints/llm/test_generate.py", | ||||
|                         "--ignore=entrypoints/llm/test_generate_multiple_loras.py", | ||||
|                         "--ignore=entrypoints/llm/test_collective_rpc.py", | ||||
|                     ] | ||||
|                 ), | ||||
|                 "pytest -v -s entrypoints/llm/test_lazy_outlines.py", | ||||
|                 "pytest -v -s entrypoints/llm/test_generate.py ", | ||||
|                 "pytest -v -s entrypoints/llm/test_generate_multiple_loras.py", | ||||
|                 "VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_regression_test": { | ||||
|             "title": "Regression Test", | ||||
|             "id": "vllm_regression_test", | ||||
|             "package_install": ["modelscope"], | ||||
|             "steps": [ | ||||
|                 "pytest -v -s test_regression.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_lora_tp_test_distributed": { | ||||
|             "title": "LoRA TP Test (Distributed)", | ||||
|             "id": "vllm_lora_tp_test_distributed", | ||||
|             "env_vars": { | ||||
|                 "VLLM_WORKER_MULTIPROC_METHOD": "spawn", | ||||
|             }, | ||||
|             "num_gpus": 4, | ||||
|             "steps": [ | ||||
|                 "pytest -v -s -x lora/test_chatglm3_tp.py", | ||||
|                 "echo $VLLM_WORKER_MULTIPROC_METHOD", | ||||
|                 "pytest -v -s -x lora/test_llama_tp.py", | ||||
|                 "pytest -v -s -x lora/test_multi_loras_with_tp.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_lora_280_failure_test": { | ||||
|             "title": "LoRA 280 failure test", | ||||
|             "id": "vllm_lora_280_failure_test", | ||||
|             "steps": ["pytest -v lora/test_quant_model.py"], | ||||
|         }, | ||||
|         "vllm_multi_model_processor_test": { | ||||
|             "title": "Multi-Modal Processor Test", | ||||
|             "id": "vllm_multi_model_processor_test", | ||||
|             "package_install": ["git+https://github.com/TIGER-AI-Lab/Mantis.git"], | ||||
|             "steps": [ | ||||
|                 "pytest -v -s models/multimodal/processing --ignore models/multimodal/processing/test_tensor_schema.py", | ||||
|             ], | ||||
|         }, | ||||
|         "vllm_pytorch_compilation_unit_tests": { | ||||
|             "title": "PyTorch Compilation Unit Tests", | ||||
|             "id": "vllm_pytorch_compilation_unit_tests", | ||||
|             "steps": [ | ||||
|                 "pytest -v -s compile/test_pass_manager.py", | ||||
|                 "pytest -v -s compile/test_fusion.py", | ||||
|                 "pytest -v -s compile/test_fusion_attn.py", | ||||
|                 "pytest -v -s compile/test_silu_mul_quant_fusion.py", | ||||
|                 "pytest -v -s compile/test_sequence_parallelism.py", | ||||
|                 "pytest -v -s compile/test_async_tp.py", | ||||
|                 "pytest -v -s compile/test_fusion_all_reduce.py", | ||||
|                 "pytest -v -s compile/test_decorator.py", | ||||
|             ], | ||||
|         }, | ||||
|         # TODO(elainewy):need to add g6 with 4 gpus to run this test | ||||
|         "vllm_lora_test": { | ||||
|             "title": "LoRA Test %N", | ||||
|             "id": "lora_test", | ||||
|             "parallelism": 4, | ||||
|             "steps": [ | ||||
|                 "echo '[checking] list sharded lora tests:'", | ||||
|                 " ".join( | ||||
|                     [ | ||||
|                         "pytest -q --collect-only lora", | ||||
|                         "--shard-id=$$BUILDKITE_PARALLEL_JOB", | ||||
|                         "--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT", | ||||
|                         "--ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py", | ||||
|                     ] | ||||
|                 ), | ||||
|                 "echo '[checking] Done. list lora tests'", | ||||
|                 " ".join( | ||||
|                     [ | ||||
|                         "pytest -v -s lora --shard-id=$$BUILDKITE_PARALLEL_JOB", | ||||
|                         "--num-shards=$$BUILDKITE_PARALLEL_JOB_COUNT", | ||||
|                         "--ignore=lora/test_chatglm3_tp.py --ignore=lora/test_llama_tp.py", | ||||
|                     ] | ||||
|                 ), | ||||
|             ], | ||||
|         }, | ||||
|     } | ||||
|  | ||||
|  | ||||
| def check_parallelism(tests: Any, title: str, shard_id: int = 0, num_shards: int = 0): | ||||
|     """ | ||||
|     a method to check if the test plan is parallelism or not. | ||||
|     """ | ||||
|     parallelism = int(tests.get("parallelism", "0")) | ||||
|     is_parallel = parallelism and parallelism > 1 | ||||
|  | ||||
|     if not is_parallel: | ||||
|         return False | ||||
|  | ||||
|     if shard_id > num_shards: | ||||
|         raise RuntimeError( | ||||
|             f"Test {title} expects {num_shards} shards, but invalid {shard_id} is provided" | ||||
|         ) | ||||
|  | ||||
|     if num_shards != parallelism: | ||||
|         raise RuntimeError( | ||||
|             f"Test {title} expects {parallelism} shards, but invalid {num_shards} is provided" | ||||
|         ) | ||||
|  | ||||
|     return True | ||||
|  | ||||
|  | ||||
| def run_test_plan( | ||||
|     test_plan: str, | ||||
|     test_target: str, | ||||
|     tests_map: dict[str, Any], | ||||
|     shard_id: int = 0, | ||||
|     num_shards: int = 0, | ||||
| ): | ||||
|     """ | ||||
|     a method to run list of tests based on the test plan. | ||||
|     """ | ||||
|     logger.info("run %s tests.....", test_target) | ||||
|     if test_plan not in tests_map: | ||||
|         raise RuntimeError( | ||||
|             f"test {test_plan} not found, please add it to test plan pool" | ||||
|         ) | ||||
|     tests = tests_map[test_plan] | ||||
|     pkgs = tests.get("package_install", []) | ||||
|     title = tests.get("title", "unknown test") | ||||
|  | ||||
|     is_parallel = check_parallelism(tests, title, shard_id, num_shards) | ||||
|     if is_parallel: | ||||
|         title = title.replace("%N", f"{shard_id}/{num_shards}") | ||||
|  | ||||
|     logger.info("Running tests: %s", title) | ||||
|     if pkgs: | ||||
|         logger.info("Installing packages: %s", pkgs) | ||||
|         pip_install_packages(packages=pkgs, prefer_uv=True) | ||||
|     with ( | ||||
|         working_directory(tests.get("working_directory", "tests")), | ||||
|         temp_environ(tests.get("env_vars", {})), | ||||
|     ): | ||||
|         failures = [] | ||||
|         for step in tests["steps"]: | ||||
|             logger.info("Running step: %s", step) | ||||
|             if is_parallel: | ||||
|                 step = replace_buildkite_placeholders(step, shard_id, num_shards) | ||||
|                 logger.info("Running parallel step: %s", step) | ||||
|             code = run_command(cmd=step, check=False, use_shell=True) | ||||
|             if code != 0: | ||||
|                 failures.append(step) | ||||
|             logger.info("Finish running step: %s", step) | ||||
|         if failures: | ||||
|             logger.error("Failed tests: %s", failures) | ||||
|             raise RuntimeError(f"{len(failures)} pytest runs failed: {failures}") | ||||
|         logger.info("Done. All tests passed") | ||||
|  | ||||
|  | ||||
| def clone_vllm(dst: str = "vllm"): | ||||
|     _, commit = clone_external_repo( | ||||
|         target="vllm", | ||||
|         repo="https://github.com/vllm-project/vllm.git", | ||||
|         dst=dst, | ||||
|         update_submodules=True, | ||||
|     ) | ||||
|     return commit | ||||
|  | ||||
|  | ||||
| def replace_buildkite_placeholders(step: str, shard_id: int, num_shards: int) -> str: | ||||
|     mapping = { | ||||
|         "$$BUILDKITE_PARALLEL_JOB_COUNT": str(num_shards), | ||||
|         "$$BUILDKITE_PARALLEL_JOB": str(shard_id), | ||||
|     } | ||||
|     for k in sorted(mapping, key=len, reverse=True): | ||||
|         step = step.replace(k, mapping[k]) | ||||
|     return step | ||||
|  | ||||
|  | ||||
| def summarize_build_info(vllm_commit: str) -> bool: | ||||
|     torch_sha = os.getenv("GITHUB_SHA") | ||||
|     md = ( | ||||
|         _TPL_VLLM_INFO.render(vllm_commit=vllm_commit, torch_sha=torch_sha).strip() | ||||
|         + "\n" | ||||
|     ) | ||||
|     return write_gh_step_summary(md) | ||||
| @ -1,285 +0,0 @@ | ||||
| import logging | ||||
| import os | ||||
| import textwrap | ||||
| from dataclasses import dataclass | ||||
| from pathlib import Path | ||||
| from typing import Optional | ||||
|  | ||||
| from cli.lib.common.cli_helper import BaseRunner | ||||
| from cli.lib.common.docker_helper import local_image_exists | ||||
| from cli.lib.common.envs_helper import ( | ||||
|     env_bool_field, | ||||
|     env_path_field, | ||||
|     env_str_field, | ||||
|     with_params_help, | ||||
| ) | ||||
| from cli.lib.common.gh_summary import ( | ||||
|     gh_summary_path, | ||||
|     summarize_content_from_file, | ||||
|     summarize_wheels, | ||||
| ) | ||||
| from cli.lib.common.path_helper import ( | ||||
|     copy, | ||||
|     ensure_dir_exists, | ||||
|     force_create_dir, | ||||
|     get_path, | ||||
|     is_path_exist, | ||||
| ) | ||||
| from cli.lib.common.utils import run_command | ||||
| from cli.lib.core.vllm.lib import clone_vllm, summarize_build_info | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| # Default path for docker build artifacts | ||||
| _DEFAULT_RESULT_PATH = "./shared" | ||||
|  | ||||
| # Temp folder in vllm work place to cp torch whls in vllm work directory for docker build | ||||
| _VLLM_TEMP_FOLDER = "tmp" | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| class VllmBuildParameters: | ||||
|     """ | ||||
|     Parameters defining the vllm external input configurations. | ||||
|     Combine with VllmDockerBuildArgs to define the vllm build environment | ||||
|     """ | ||||
|  | ||||
|     # USE_TORCH_WHEEL: when true, use local Torch wheels; requires TORCH_WHEELS_PATH. | ||||
|     # Otherwise docker build pull torch nightly during build | ||||
|     # TORCH_WHEELS_PATH: directory containing local torch wheels when use_torch_whl is True | ||||
|     use_torch_whl: bool = env_bool_field("USE_TORCH_WHEEL", True) | ||||
|     torch_whls_path: Path = env_path_field("TORCH_WHEELS_PATH", "./dist") | ||||
|  | ||||
|     # USE_LOCAL_BASE_IMAGE: when true, use an existing local Docker base image; requires BASE_IMAGE | ||||
|     # Otherwise, pull dockerfile's default image remotely | ||||
|     # BASE_IMAGE: name:tag (only needed when use_local_base_image is True) | ||||
|     use_local_base_image: bool = env_bool_field("USE_LOCAL_BASE_IMAGE", True) | ||||
|     base_image: str = env_str_field("BASE_IMAGE") | ||||
|  | ||||
|     # USE_LOCAL_DOCKERFILE: when true("1"), use a local Dockerfile; requires DOCKERFILE_PATH. | ||||
|     # otherwise, use vllm's default dockerfile.torch_nightly for build | ||||
|     # DOCKERFILE_PATH: path to Dockerfile used when use_local_dockerfile is True" | ||||
|     use_local_dockerfile: bool = env_bool_field("USE_LOCAL_DOCKERFILE", True) | ||||
|     dockerfile_path: Path = env_path_field( | ||||
|         "DOCKERFILE_PATH", ".github/ci_configs/vllm/Dockerfile.tmp_vllm" | ||||
|     ) | ||||
|  | ||||
|     # OUTPUT_DIR: where docker buildx (local exporter) will write artifacts | ||||
|     output_dir: Path = env_path_field("OUTPUT_DIR", "external/vllm") | ||||
|  | ||||
|     # --- Build args ---------------------------------------------------------- | ||||
|     target_stage: str = env_str_field("TARGET_STAGE", "export-wheels") | ||||
|  | ||||
|     tag_name: str = env_str_field("TAG", "vllm-wheels") | ||||
|  | ||||
|     cuda_version: str = env_str_field("CUDA_VERSION", "12.8.1") | ||||
|  | ||||
|     python_version: str = env_str_field("PYTHON_VERSION", "3.12") | ||||
|  | ||||
|     max_jobs: str = env_str_field("MAX_JOBS", "64") | ||||
|  | ||||
|     sccache_bucket: str = env_str_field("SCCACHE_BUCKET") | ||||
|  | ||||
|     sccache_region: str = env_str_field("SCCACHE_REGION") | ||||
|  | ||||
|     torch_cuda_arch_list: str = env_str_field("TORCH_CUDA_ARCH_LIST", "8.9") | ||||
|  | ||||
|     def __post_init__(self): | ||||
|         checks = [ | ||||
|             ( | ||||
|                 self.use_torch_whl,  # flag | ||||
|                 True,  # trigger_value | ||||
|                 "torch_whls_path",  # resource | ||||
|                 is_path_exist,  # check_func | ||||
|                 "TORCH_WHEELS_PATH is not provided, but USE_TORCH_WHEEL is set to 1", | ||||
|             ), | ||||
|             ( | ||||
|                 self.use_local_base_image, | ||||
|                 True, | ||||
|                 "base_image", | ||||
|                 local_image_exists, | ||||
|                 f"BASE_IMAGE {self.base_image} does not found, but USE_LOCAL_BASE_IMAGE is set to 1", | ||||
|             ), | ||||
|             ( | ||||
|                 self.use_local_dockerfile, | ||||
|                 True, | ||||
|                 "dockerfile_path", | ||||
|                 is_path_exist, | ||||
|                 " DOCKERFILE_PATH path does not found, but USE_LOCAL_DOCKERFILE is set to 1", | ||||
|             ), | ||||
|         ] | ||||
|         for flag, trigger_value, attr_name, check_func, error_msg in checks: | ||||
|             value = getattr(self, attr_name) | ||||
|             if flag == trigger_value: | ||||
|                 if not value or not check_func(value): | ||||
|                     raise ValueError(error_msg) | ||||
|             else: | ||||
|                 logger.info("flag  %s is not set", flag) | ||||
|         if not self.output_dir: | ||||
|             raise ValueError("missing required output_dir") | ||||
|  | ||||
|  | ||||
| @with_params_help(VllmBuildParameters) | ||||
| class VllmBuildRunner(BaseRunner): | ||||
|     """ | ||||
|     Build vLLM using docker buildx. | ||||
|  | ||||
|     Environment variable options: | ||||
|         "USE_TORCH_WHEEL":      "1: use local wheels; 0: pull nightly from pypi", | ||||
|         "TORCH_WHEELS_PATH":    "Path to local wheels (when USE_TORCH_WHEEL=1)", | ||||
|  | ||||
|         "USE_LOCAL_BASE_IMAGE": "1: use local base image; 0: default image", | ||||
|          "BASE_IMAGE":           "name:tag to indicate base image the dockerfile depends on (when USE_LOCAL_BASE_IMAGE=1)", | ||||
|  | ||||
|         "USE_LOCAL_DOCKERFILE": "1: use local Dockerfile; 0: vllm repo default dockerfile.torch_nightly", | ||||
|         "DOCKERFILE_PATH":      "Path to Dockerfile (when USE_LOCAL_DOCKERFILE=1)", | ||||
|  | ||||
|         "OUTPUT_DIR":           "e.g. './shared'", | ||||
|  | ||||
|         "TORCH_CUDA_ARCH_LIST": "e.g. '8.0' or '8.0;9.0'", | ||||
|         "CUDA_VERSION":         "e.g. '12.8.1'", | ||||
|         "PYTHON_VERSION":       "e.g. '3.12'", | ||||
|         "MAX_JOBS":             "e.g. '64'", | ||||
|         "SCCACHE_BUCKET":       "e.g. 'my-bucket'", | ||||
|         "SCCACHE_REGION":       "e.g. 'us-west-2'", | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, args=None): | ||||
|         self.work_directory = "vllm" | ||||
|  | ||||
|     def run(self): | ||||
|         """ | ||||
|         main function to run vllm build | ||||
|         1. prepare vllm build environment | ||||
|         2. prepare the docker build command args | ||||
|         3. run docker build | ||||
|         """ | ||||
|         inputs = VllmBuildParameters() | ||||
|         logger.info("Running vllm build with inputs: %s", inputs) | ||||
|         vllm_commit = clone_vllm() | ||||
|  | ||||
|         self.cp_dockerfile_if_exist(inputs) | ||||
|         # cp torch wheels from root direct to vllm workspace if exist | ||||
|         self.cp_torch_whls_if_exist(inputs) | ||||
|  | ||||
|         # make sure the output dir to store the build artifacts exist | ||||
|         ensure_dir_exists(Path(inputs.output_dir)) | ||||
|  | ||||
|         cmd = self._generate_docker_build_cmd(inputs) | ||||
|         logger.info("Running docker build: \n %s", cmd) | ||||
|  | ||||
|         try: | ||||
|             run_command(cmd, cwd="vllm", env=os.environ.copy()) | ||||
|         finally: | ||||
|             self.genearte_vllm_build_summary(vllm_commit, inputs) | ||||
|  | ||||
|     def genearte_vllm_build_summary( | ||||
|         self, vllm_commit: str, inputs: VllmBuildParameters | ||||
|     ): | ||||
|         if not gh_summary_path(): | ||||
|             return logger.info("Skipping, not detect GH Summary env var....") | ||||
|         logger.info("Generate GH Summary ...") | ||||
|         # summarize vllm build info | ||||
|         summarize_build_info(vllm_commit) | ||||
|  | ||||
|         # summarize vllm build artifacts | ||||
|         vllm_artifact_dir = inputs.output_dir / "wheels" | ||||
|         summarize_content_from_file( | ||||
|             vllm_artifact_dir, | ||||
|             "build_summary.txt", | ||||
|             title="Vllm build env pip package summary", | ||||
|         ) | ||||
|         summarize_wheels( | ||||
|             inputs.torch_whls_path, max_depth=3, title="Torch Wheels Artifacts" | ||||
|         ) | ||||
|         summarize_wheels(vllm_artifact_dir, max_depth=3, title="Vllm Wheels Artifacts") | ||||
|  | ||||
|     def cp_torch_whls_if_exist(self, inputs: VllmBuildParameters) -> str: | ||||
|         if not inputs.use_torch_whl: | ||||
|             return "" | ||||
|         tmp_dir = f"./{self.work_directory}/{_VLLM_TEMP_FOLDER}" | ||||
|         tmp_path = Path(tmp_dir) | ||||
|         force_create_dir(tmp_path) | ||||
|         copy(inputs.torch_whls_path, tmp_dir) | ||||
|         return tmp_dir | ||||
|  | ||||
|     def cp_dockerfile_if_exist(self, inputs: VllmBuildParameters): | ||||
|         if not inputs.use_local_dockerfile: | ||||
|             logger.info("using vllm default dockerfile.torch_nightly for build") | ||||
|             return | ||||
|         dockerfile_path = get_path(inputs.dockerfile_path, resolve=True) | ||||
|         vllm_torch_dockerfile = Path( | ||||
|             f"./{self.work_directory}/docker/Dockerfile.nightly_torch" | ||||
|         ) | ||||
|         copy(dockerfile_path, vllm_torch_dockerfile) | ||||
|  | ||||
|     def get_result_path(self, path): | ||||
|         """ | ||||
|         Get the absolute path of the result path | ||||
|         """ | ||||
|         if not path: | ||||
|             path = _DEFAULT_RESULT_PATH | ||||
|         abs_path = get_path(path, resolve=True) | ||||
|         return abs_path | ||||
|  | ||||
|     def _get_torch_wheel_path_arg(self, torch_whl_dir: Optional[Path]) -> str: | ||||
|         if not torch_whl_dir: | ||||
|             return "" | ||||
|         return f"--build-arg TORCH_WHEELS_PATH={_VLLM_TEMP_FOLDER}" | ||||
|  | ||||
|     def _get_base_image_args(self, inputs: VllmBuildParameters) -> tuple[str, str, str]: | ||||
|         """ | ||||
|         Returns: | ||||
|             - base_image_arg: docker buildx arg string for base image | ||||
|             - final_base_image_arg:  docker buildx arg string for vllm-base stage | ||||
|             - pull_flag: --pull=true or --pull=false depending on whether the image exists locally | ||||
|         """ | ||||
|         if not inputs.use_local_base_image: | ||||
|             return "", "", "" | ||||
|  | ||||
|         base_image = inputs.base_image | ||||
|  | ||||
|         # set both base image and final base image to the same local image | ||||
|         base_image_arg = f"--build-arg BUILD_BASE_IMAGE={base_image}" | ||||
|         final_base_image_arg = f"--build-arg FINAL_BASE_IMAGE={base_image}" | ||||
|  | ||||
|         if local_image_exists(base_image): | ||||
|             pull_flag = "--pull=false" | ||||
|             return base_image_arg, final_base_image_arg, pull_flag | ||||
|         logger.info( | ||||
|             "[INFO] Local image not found:%s will try to pull from remote", {base_image} | ||||
|         ) | ||||
|         return base_image_arg, final_base_image_arg, "" | ||||
|  | ||||
|     def _generate_docker_build_cmd( | ||||
|         self, | ||||
|         inputs: VllmBuildParameters, | ||||
|     ) -> str: | ||||
|         base_image_arg, final_base_image_arg, pull_flag = self._get_base_image_args( | ||||
|             inputs | ||||
|         ) | ||||
|         torch_arg = self._get_torch_wheel_path_arg(inputs.torch_whls_path) | ||||
|  | ||||
|         return textwrap.dedent( | ||||
|             f""" | ||||
|             docker buildx build \ | ||||
|                 --output type=local,dest={inputs.output_dir} \ | ||||
|                 -f docker/Dockerfile.nightly_torch \ | ||||
|                 {pull_flag} \ | ||||
|                 {torch_arg} \ | ||||
|                 {base_image_arg} \ | ||||
|                 {final_base_image_arg} \ | ||||
|                 --build-arg max_jobs={inputs.max_jobs} \ | ||||
|                 --build-arg CUDA_VERSION={inputs.cuda_version} \ | ||||
|                 --build-arg PYTHON_VERSION={inputs.python_version} \ | ||||
|                 --build-arg USE_SCCACHE={int(bool(inputs.sccache_bucket and inputs.sccache_region))} \ | ||||
|                 --build-arg SCCACHE_BUCKET_NAME={inputs.sccache_bucket} \ | ||||
|                 --build-arg SCCACHE_REGION_NAME={inputs.sccache_region} \ | ||||
|                 --build-arg torch_cuda_arch_list='{inputs.torch_cuda_arch_list}' \ | ||||
|                 --target {inputs.target_stage} \ | ||||
|                 -t {inputs.tag_name} \ | ||||
|                 --progress=plain . | ||||
|         """ | ||||
|         ).strip() | ||||
| @ -1,263 +0,0 @@ | ||||
| import logging | ||||
| import os | ||||
| import re | ||||
| import subprocess | ||||
| import sys | ||||
| from collections.abc import Iterable | ||||
| from dataclasses import dataclass | ||||
| from enum import Enum | ||||
| from pathlib import Path | ||||
| from typing import Any | ||||
|  | ||||
| from cli.lib.common.cli_helper import BaseRunner | ||||
| from cli.lib.common.envs_helper import env_path_field, env_str_field, get_env | ||||
| from cli.lib.common.path_helper import copy, remove_dir | ||||
| from cli.lib.common.pip_helper import ( | ||||
|     pip_install_first_match, | ||||
|     pip_install_packages, | ||||
|     pkg_exists, | ||||
|     run_python, | ||||
| ) | ||||
| from cli.lib.common.utils import run_command, working_directory | ||||
| from cli.lib.core.vllm.lib import clone_vllm, run_test_plan, sample_vllm_test_library | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| class VllmTestParameters: | ||||
|     """ | ||||
|     Parameters defining the vllm external test input | ||||
|  | ||||
|     !!!DO NOT ADD SECRETS IN THIS CLASS!!! | ||||
|     you can put environment variable name in VllmTestParameters if it's not the same as the secret one | ||||
|     fetch secrests directly from env variables during runtime | ||||
|     """ | ||||
|  | ||||
|     torch_whls_path: Path = env_path_field("WHEELS_PATH", "./dist") | ||||
|  | ||||
|     vllm_whls_path: Path = env_path_field( | ||||
|         "VLLM_WHEELS_PATH", "./dist/external/vllm/wheels" | ||||
|     ) | ||||
|  | ||||
|     torch_cuda_arch_list: str = env_str_field("TORCH_CUDA_ARCH_LIST", "8.9") | ||||
|  | ||||
|     def __post_init__(self): | ||||
|         if not self.torch_whls_path.exists(): | ||||
|             raise ValueError("missing torch_whls_path") | ||||
|         if not self.vllm_whls_path.exists(): | ||||
|             raise ValueError("missing vllm_whls_path") | ||||
|  | ||||
|  | ||||
| class TestInpuType(Enum): | ||||
|     TEST_PLAN = "test_plan" | ||||
|     UNKNOWN = "unknown" | ||||
|  | ||||
|  | ||||
| class VllmTestRunner(BaseRunner): | ||||
|     def __init__(self, args: Any): | ||||
|         self.work_directory = "vllm" | ||||
|         self.test_plan = "" | ||||
|         self.test_type = TestInpuType.UNKNOWN | ||||
|  | ||||
|         self.shard_id = args.shard_id | ||||
|         self.num_shards = args.num_shards | ||||
|  | ||||
|         if args.test_plan: | ||||
|             self.test_plan = args.test_plan | ||||
|             self.test_type = TestInpuType.TEST_PLAN | ||||
|  | ||||
|         # Matches the structeur in the artifacts.zip from torcb build | ||||
|         self.TORCH_WHL_PATH_REGEX = "torch*.whl" | ||||
|         self.TORCH_WHL_EXTRA = "opt-einsum" | ||||
|         self.TORCH_ADDITIONAL_WHLS_REGEX = [ | ||||
|             "vision/torchvision*.whl", | ||||
|             "audio/torchaudio*.whl", | ||||
|         ] | ||||
|  | ||||
|         # Match the structure of the artifacts.zip from vllm external build | ||||
|         self.VLLM_TEST_WHLS_REGEX = [ | ||||
|             "xformers/*.whl", | ||||
|             "vllm/vllm*.whl", | ||||
|             "flashinfer-python/flashinfer*.whl", | ||||
|         ] | ||||
|  | ||||
|     def prepare(self): | ||||
|         """ | ||||
|         prepare test environment for vllm. This includes clone vllm repo, install all wheels, test dependencies and set env | ||||
|         """ | ||||
|         params = VllmTestParameters() | ||||
|         logger.info("Display VllmTestParameters %s", params) | ||||
|         self._set_envs(params) | ||||
|  | ||||
|         clone_vllm(dst=self.work_directory) | ||||
|         with working_directory(self.work_directory): | ||||
|             remove_dir(Path("vllm")) | ||||
|             self._install_wheels(params) | ||||
|             self._install_dependencies() | ||||
|         # verify the torches are not overridden by test dependencies | ||||
|         check_versions() | ||||
|  | ||||
|     def run(self): | ||||
|         """ | ||||
|         main function to run vllm test | ||||
|         """ | ||||
|         self.prepare() | ||||
|         with working_directory(self.work_directory): | ||||
|             if self.test_type == TestInpuType.TEST_PLAN: | ||||
|                 if self.num_shards > 1: | ||||
|                     run_test_plan( | ||||
|                         self.test_plan, | ||||
|                         "vllm", | ||||
|                         sample_vllm_test_library(), | ||||
|                         self.shard_id, | ||||
|                         self.num_shards, | ||||
|                     ) | ||||
|                 else: | ||||
|                     run_test_plan(self.test_plan, "vllm", sample_vllm_test_library()) | ||||
|             else: | ||||
|                 raise ValueError(f"Unknown test type {self.test_type}") | ||||
|  | ||||
|     def _install_wheels(self, params: VllmTestParameters): | ||||
|         logger.info("Running vllm test with inputs: %s", params) | ||||
|         if not pkg_exists("torch"): | ||||
|             # install torch from local whls if it's not installed yet. | ||||
|             torch_p = f"{str(params.torch_whls_path)}/{self.TORCH_WHL_PATH_REGEX}" | ||||
|             pip_install_first_match(torch_p, self.TORCH_WHL_EXTRA) | ||||
|  | ||||
|         torch_whls_path = [ | ||||
|             f"{str(params.torch_whls_path)}/{whl_path}" | ||||
|             for whl_path in self.TORCH_ADDITIONAL_WHLS_REGEX | ||||
|         ] | ||||
|         for torch_whl in torch_whls_path: | ||||
|             pip_install_first_match(torch_whl) | ||||
|         logger.info("Done. Installed torch and other torch-related wheels ") | ||||
|  | ||||
|         logger.info("Installing vllm wheels") | ||||
|         vllm_whls_path = [ | ||||
|             f"{str(params.vllm_whls_path)}/{whl_path}" | ||||
|             for whl_path in self.VLLM_TEST_WHLS_REGEX | ||||
|         ] | ||||
|         for vllm_whl in vllm_whls_path: | ||||
|             pip_install_first_match(vllm_whl) | ||||
|         logger.info("Done. Installed vllm wheels") | ||||
|  | ||||
|     def _install_test_dependencies(self): | ||||
|         """ | ||||
|         This method replaces torch dependencies with local torch wheel info in | ||||
|         requirements/test.in file from vllm repo. then generates the test.txt | ||||
|         in runtime | ||||
|         """ | ||||
|         logger.info("generate test.txt from requirements/test.in with local torch whls") | ||||
|         preprocess_test_in() | ||||
|         copy("requirements/test.txt", "snapshot_constraint.txt") | ||||
|  | ||||
|         run_command( | ||||
|             f"{sys.executable} -m uv pip compile requirements/test.in " | ||||
|             "-o test.txt " | ||||
|             "--index-strategy unsafe-best-match " | ||||
|             "--constraint snapshot_constraint.txt " | ||||
|             "--torch-backend cu128" | ||||
|         ) | ||||
|         pip_install_packages(requirements="test.txt", prefer_uv=True) | ||||
|         logger.info("Done. installed requirements for test dependencies") | ||||
|  | ||||
|     def _install_dependencies(self): | ||||
|         pip_install_packages(packages=["-e", "tests/vllm_test_utils"], prefer_uv=True) | ||||
|         pip_install_packages(packages=["hf_transfer"], prefer_uv=True) | ||||
|         os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1" | ||||
|  | ||||
|         # using script from vllm repo to remove all torch packages from requirements txt | ||||
|         run_python("use_existing_torch.py") | ||||
|  | ||||
|         # install common packages | ||||
|         for requirements in ["requirements/common.txt", "requirements/build.txt"]: | ||||
|             pip_install_packages( | ||||
|                 requirements=requirements, | ||||
|                 prefer_uv=True, | ||||
|             ) | ||||
|         # install test packages | ||||
|         self._install_test_dependencies() | ||||
|  | ||||
|     def _set_envs(self, inputs: VllmTestParameters): | ||||
|         os.environ["TORCH_CUDA_ARCH_LIST"] = inputs.torch_cuda_arch_list | ||||
|         if not validate_cuda(get_env("TORCH_CUDA_ARCH_LIST")): | ||||
|             logger.warning( | ||||
|                 "Missing supported TORCH_CUDA_ARCH_LIST. " | ||||
|                 "Currently support TORCH_CUDA_ARCH_LIST env var " | ||||
|                 "with supported arch [8.0, 8.9, 9.0]" | ||||
|             ) | ||||
|  | ||||
|         os.environ["HF_TOKEN"] = os.getenv("VLLM_TEST_HUGGING_FACE_TOKEN", "") | ||||
|         if not get_env("HF_TOKEN"): | ||||
|             raise ValueError( | ||||
|                 "missing required HF_TOKEN, please set VLLM_TEST_HUGGING_FACE_TOKEN env var" | ||||
|             ) | ||||
|         if not get_env("TORCH_CUDA_ARCH_LIST"): | ||||
|             raise ValueError( | ||||
|                 "missing required TORCH_CUDA_ARCH_LIST, please set TORCH_CUDA_ARCH_LIST env var" | ||||
|             ) | ||||
|  | ||||
|  | ||||
| def preprocess_test_in( | ||||
|     target_file: str = "requirements/test.in", additional_packages: Iterable[str] = () | ||||
| ): | ||||
|     """ | ||||
|     This modifies the target_file file in place in vllm work directory. | ||||
|     It removes torch and unwanted packages in target_file and replace with local torch whls | ||||
|     package  with format "$WHEEL_PACKAGE_NAME @ file://<LOCAL_PATH>" | ||||
|     """ | ||||
|     additional_package_to_move = list(additional_packages or ()) | ||||
|     pkgs_to_remove = [ | ||||
|         "torch", | ||||
|         "torchvision", | ||||
|         "torchaudio", | ||||
|         "xformers", | ||||
|         "mamba_ssm", | ||||
|     ] + additional_package_to_move | ||||
|     # Read current requirements | ||||
|     target_path = Path(target_file) | ||||
|     lines = target_path.read_text().splitlines() | ||||
|  | ||||
|     pkgs_to_add = [] | ||||
|  | ||||
|     # Remove lines starting with the package names (==, @, >=) — case-insensitive | ||||
|     pattern = re.compile(rf"^({'|'.join(pkgs_to_remove)})\s*(==|@|>=)", re.IGNORECASE) | ||||
|     kept_lines = [line for line in lines if not pattern.match(line)] | ||||
|  | ||||
|     # Get local installed torch/vision/audio from pip freeze | ||||
|     # This is hacky, but it works | ||||
|     pip_freeze = subprocess.check_output(["pip", "freeze"], text=True) | ||||
|     header_lines = [ | ||||
|         line | ||||
|         for line in pip_freeze.splitlines() | ||||
|         if re.match( | ||||
|             r"^(torch|torchvision|torchaudio)\s*@\s*file://", line, re.IGNORECASE | ||||
|         ) | ||||
|     ] | ||||
|  | ||||
|     # Write back: header_lines + blank + kept_lines | ||||
|     out_lines = header_lines + [""] + kept_lines | ||||
|     if pkgs_to_add: | ||||
|         out_lines += [""] + pkgs_to_add | ||||
|  | ||||
|     out = "\n".join(out_lines) + "\n" | ||||
|     target_path.write_text(out) | ||||
|     logger.info("[INFO] Updated %s", target_file) | ||||
|  | ||||
|  | ||||
| def validate_cuda(value: str) -> bool: | ||||
|     VALID_VALUES = {"8.0", "8.9", "9.0"} | ||||
|     return all(v in VALID_VALUES for v in value.split()) | ||||
|  | ||||
|  | ||||
| def check_versions(): | ||||
|     """ | ||||
|     check installed packages version | ||||
|     """ | ||||
|     logger.info("Double check installed packages") | ||||
|     patterns = ["torch", "xformers", "torchvision", "torchaudio", "vllm"] | ||||
|     for pkg in patterns: | ||||
|         pkg_exists(pkg) | ||||
|     logger.info("Done. checked installed packages") | ||||
| @ -1,40 +0,0 @@ | ||||
| # main.py | ||||
|  | ||||
| import argparse | ||||
| import logging | ||||
|  | ||||
| from cli.build_cli.register_build import register_build_commands | ||||
| from cli.lib.common.logger import setup_logging | ||||
| from cli.test_cli.register_test import register_test_commands | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
|  | ||||
| def main(): | ||||
|     # Define top-level parser | ||||
|     parser = argparse.ArgumentParser(description="Lumos CLI") | ||||
|     subparsers = parser.add_subparsers(dest="command", required=True) | ||||
|     parser.add_argument( | ||||
|         "--log-level", default="INFO", help="Log level (DEBUG, INFO, WARNING, ERROR)" | ||||
|     ) | ||||
|  | ||||
|     # registers second-level subcommands | ||||
|     register_build_commands(subparsers) | ||||
|     register_test_commands(subparsers) | ||||
|  | ||||
|     # parse args after all options are registered | ||||
|     args = parser.parse_args() | ||||
|  | ||||
|     # setup global logging | ||||
|     setup_logging(getattr(logging, args.log_level.upper(), logging.INFO)) | ||||
|     logger.debug("Parsed args: %s", args) | ||||
|  | ||||
|     if hasattr(args, "func"): | ||||
|         args.func(args) | ||||
|     else: | ||||
|         parser.print_help() | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
| @ -1,62 +0,0 @@ | ||||
| import argparse | ||||
| import logging | ||||
|  | ||||
| from cli.lib.common.cli_helper import register_targets, RichHelp, TargetSpec | ||||
| from cli.lib.core.vllm.vllm_test import VllmTestRunner | ||||
|  | ||||
|  | ||||
| logger = logging.getLogger(__name__) | ||||
|  | ||||
| # Maps targets to their argparse configuration and runner | ||||
| # it adds new target to path python -m cli.run build external {target} with buildrunner | ||||
| _TARGETS: dict[str, TargetSpec] = { | ||||
|     "vllm": { | ||||
|         "runner": VllmTestRunner, | ||||
|         "help": "test vLLM with pytorch main", | ||||
|     } | ||||
|     # add yours ... | ||||
| } | ||||
|  | ||||
|  | ||||
| def common_args(parser: argparse.ArgumentParser) -> None: | ||||
|     """ | ||||
|     Add common CLI arguments to the given parser. | ||||
|     """ | ||||
|     parser.add_argument( | ||||
|         "--shard-id", | ||||
|         type=int, | ||||
|         default=1, | ||||
|         help="a shard id to run, e.g. '0,1,2,3'", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "--num-shards", | ||||
|         type=int, | ||||
|         default=1, | ||||
|         help="a number of shards to run, e.g. '4'", | ||||
|     ) | ||||
|     group = parser.add_mutually_exclusive_group(required=True) | ||||
|     group.add_argument( | ||||
|         "-tp", | ||||
|         "--test-plan", | ||||
|         type=str, | ||||
|         help="a pre-defined test plan to run, e.g. 'basic_correctness_test'", | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def register_test_commands(subparsers: argparse._SubParsersAction) -> None: | ||||
|     build_parser = subparsers.add_parser( | ||||
|         "test", | ||||
|         help="test related commands", | ||||
|         formatter_class=RichHelp, | ||||
|     ) | ||||
|     build_subparsers = build_parser.add_subparsers(dest="test_command", required=True) | ||||
|     overview = "\n".join( | ||||
|         f"  {name:12} {spec.get('help', '')}" for name, spec in _TARGETS.items() | ||||
|     ) | ||||
|     external_parser = build_subparsers.add_parser( | ||||
|         "external", | ||||
|         help="Test external targets", | ||||
|         description="Test third-party targets.\n\nAvailable targets:\n" + overview, | ||||
|         formatter_class=RichHelp, | ||||
|     ) | ||||
|     register_targets(external_parser, _TARGETS, common_args=common_args) | ||||
| @ -1,23 +0,0 @@ | ||||
| [project] | ||||
| name = "lumen-ci" | ||||
| version = "0.1.0" | ||||
| dependencies = [ | ||||
|     "pyyaml==6.0.2", | ||||
|     "GitPython==3.1.45", | ||||
|     "docker==7.1.0", | ||||
|     "pytest==7.3.2", | ||||
|     "uv==0.8.6" | ||||
| ] | ||||
|  | ||||
| [tool.setuptools] | ||||
| packages = ["cli"] | ||||
|  | ||||
| [tool.setuptools.package-dir] | ||||
| cli = "cli" | ||||
|  | ||||
| [tool.ruff.lint] | ||||
| # Enable preview mode for linting | ||||
| preview = true | ||||
|  | ||||
| # Now you can select your preview rules, like RUF048 | ||||
| extend-select = ["RUF048"] | ||||
| @ -1,47 +0,0 @@ | ||||
| # tests/test_cli.py | ||||
| import io | ||||
| import sys | ||||
| import unittest | ||||
| from contextlib import redirect_stderr, redirect_stdout | ||||
| from unittest.mock import patch | ||||
|  | ||||
| from cli.run import main | ||||
|  | ||||
|  | ||||
| class TestArgparseCLI(unittest.TestCase): | ||||
|     @patch("cli.build_cli.register_build.VllmBuildRunner.run", return_value=None) | ||||
|     @patch("cli.build_cli.register_build.VllmBuildRunner.__init__", return_value=None) | ||||
|     def test_cli_run_build_external(self, mock_init, mock_run): | ||||
|         from cli.run import main  # import after patches if needed | ||||
|  | ||||
|         test_args = ["cli.run", "build", "external", "vllm"] | ||||
|         with patch.object(sys, "argv", test_args): | ||||
|             # argparse may call sys.exit on error; capture to avoid test aborts | ||||
|             try: | ||||
|                 main() | ||||
|             except SystemExit: | ||||
|                 pass | ||||
|         mock_init.assert_called_once()  # got constructed | ||||
|         mock_run.assert_called_once_with()  # run() called | ||||
|  | ||||
|     def test_build_help(self): | ||||
|         test_args = ["cli.run", "build", "--help"] | ||||
|  | ||||
|         with patch.object(sys, "argv", test_args): | ||||
|             stdout = io.StringIO() | ||||
|             stderr = io.StringIO() | ||||
|  | ||||
|             # --help always raises SystemExit(0) | ||||
|             with self.assertRaises(SystemExit) as cm: | ||||
|                 with redirect_stdout(stdout), redirect_stderr(stderr): | ||||
|                     main() | ||||
|  | ||||
|             self.assertEqual(cm.exception.code, 0) | ||||
|  | ||||
|             output = stdout.getvalue() | ||||
|             self.assertIn("usage", output) | ||||
|             self.assertIn("external", output) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     unittest.main() | ||||
| @ -1,115 +0,0 @@ | ||||
| import argparse | ||||
| import io | ||||
| import unittest | ||||
| from contextlib import redirect_stderr | ||||
| from unittest.mock import patch | ||||
|  | ||||
| from cli.lib.common.cli_helper import BaseRunner, register_targets, RichHelp, TargetSpec | ||||
|  | ||||
|  | ||||
| # ---- Dummy runners for unittests---- | ||||
| class FooRunner(BaseRunner): | ||||
|     """Foo description from docstring.""" | ||||
|  | ||||
|     def run(self) -> None:  # replaced by mock | ||||
|         pass | ||||
|  | ||||
|  | ||||
| class BarRunner(BaseRunner): | ||||
|     def run(self) -> None:  # replaced by mock | ||||
|         pass | ||||
|  | ||||
|  | ||||
| def add_foo_args(p: argparse.ArgumentParser) -> None: | ||||
|     p.add_argument("--x", type=int, required=True, help="x value") | ||||
|  | ||||
|  | ||||
| def common_args(p: argparse.ArgumentParser) -> None: | ||||
|     p.add_argument("--verbose", action="store_true", help="verbose flag") | ||||
|  | ||||
|  | ||||
| def build_parser(specs: dict[str, TargetSpec]) -> argparse.ArgumentParser: | ||||
|     parser = argparse.ArgumentParser(prog="app", formatter_class=RichHelp) | ||||
|     register_targets( | ||||
|         parser=parser, | ||||
|         target_specs=specs, | ||||
|         common_args=common_args, | ||||
|     ) | ||||
|     return parser | ||||
|  | ||||
|  | ||||
| def get_subparser( | ||||
|     parser: argparse.ArgumentParser, name: str | ||||
| ) -> argparse.ArgumentParser: | ||||
|     subparsers_action = next( | ||||
|         a | ||||
|         for a in parser._subparsers._group_actions  # type: ignore[attr-defined] | ||||
|         if isinstance(a, argparse._SubParsersAction) | ||||
|     ) | ||||
|     return subparsers_action.choices[name] | ||||
|  | ||||
|  | ||||
| class TestRegisterTargets(unittest.TestCase): | ||||
|     def test_metavar_lists_targets(self): | ||||
|         specs: dict[str, TargetSpec] = { | ||||
|             "foo": {"runner": FooRunner, "add_arguments": add_foo_args}, | ||||
|             "bar": {"runner": BarRunner}, | ||||
|         } | ||||
|         parser = build_parser(specs) | ||||
|         subparsers_action = next( | ||||
|             a | ||||
|             for a in parser._subparsers._group_actions  # type: ignore[attr-defined] | ||||
|             if isinstance(a, argparse._SubParsersAction) | ||||
|         ) | ||||
|         self.assertEqual(subparsers_action.metavar, "{foo,bar}") | ||||
|  | ||||
|     def test_add_arguments_and_common_args_present(self): | ||||
|         specs: dict[str, TargetSpec] = { | ||||
|             "foo": {"runner": FooRunner, "add_arguments": add_foo_args}, | ||||
|         } | ||||
|         parser = build_parser(specs) | ||||
|         foo = get_subparser(parser, "foo") | ||||
|         help_text = foo.format_help() | ||||
|         self.assertIn("--x", help_text) | ||||
|         self.assertIn("--verbose", help_text) | ||||
|  | ||||
|     def test_runner_constructed_with_ns_and_run_called(self): | ||||
|         specs: dict[str, TargetSpec] = { | ||||
|             "foo": {"runner": FooRunner, "add_arguments": add_foo_args}, | ||||
|         } | ||||
|         parser = build_parser(specs) | ||||
|  | ||||
|         with ( | ||||
|             patch.object(FooRunner, "__init__", return_value=None) as mock_init, | ||||
|             patch.object(FooRunner, "run", return_value=None) as mock_run, | ||||
|         ): | ||||
|             ns = parser.parse_args(["foo", "--x", "3", "--verbose"]) | ||||
|             ns.func(ns)  # set by register_targets | ||||
|             # __init__ received the Namespace | ||||
|             self.assertEqual(mock_init.call_count, 1) | ||||
|             (called_ns,), _ = mock_init.call_args | ||||
|             self.assertIsInstance(called_ns, argparse.Namespace) | ||||
|             # run() called with no args | ||||
|             mock_run.assert_called_once_with() | ||||
|  | ||||
|     def test_runner_docstring_used_as_description_when_missing(self): | ||||
|         specs: dict[str, TargetSpec] = { | ||||
|             "foo": {"runner": FooRunner, "add_arguments": add_foo_args}, | ||||
|         } | ||||
|         parser = build_parser(specs) | ||||
|         foo = get_subparser(parser, "foo") | ||||
|         help_text = foo.format_help() | ||||
|         self.assertIn("Foo description from docstring.", help_text) | ||||
|  | ||||
|     def test_missing_target_raises_systemexit_with_usage(self): | ||||
|         specs: dict[str, TargetSpec] = {"foo": {"runner": FooRunner}} | ||||
|         parser = build_parser(specs) | ||||
|         buf = io.StringIO() | ||||
|         with self.assertRaises(SystemExit), redirect_stderr(buf): | ||||
|             parser.parse_args([]) | ||||
|         err = buf.getvalue() | ||||
|         self.assertIn("usage:", err) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     unittest.main() | ||||
| @ -1,75 +0,0 @@ | ||||
| import unittest | ||||
| from unittest import mock | ||||
| from unittest.mock import MagicMock | ||||
|  | ||||
| import docker.errors as derr | ||||
| from cli.lib.common.docker_helper import _get_client, local_image_exists | ||||
|  | ||||
|  | ||||
| class TestDockerImageHelpers(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         # Reset the singleton in the target module | ||||
|         patcher = mock.patch("cli.lib.common.docker_helper._docker_client", None) | ||||
|         self.addCleanup(patcher.stop) | ||||
|         patcher.start() | ||||
|  | ||||
|     def test_local_image_exists_true(self): | ||||
|         # Mock a docker client whose images.get returns an object (no exception) | ||||
|         mock_client = MagicMock() | ||||
|         mock_client.images.get.return_value = object() | ||||
|         ok = local_image_exists("repo:tag", client=mock_client) | ||||
|         self.assertTrue(ok) | ||||
|  | ||||
|     def test_local_image_exists_not_found_false(self): | ||||
|         mock_client = MagicMock() | ||||
|         # Raise docker.errors.NotFound | ||||
|         mock_client.images.get.side_effect = derr.NotFound("nope") | ||||
|         ok = local_image_exists("missing:latest", client=mock_client) | ||||
|         self.assertFalse(ok) | ||||
|  | ||||
|     def test_local_image_exists_api_error_false(self): | ||||
|         mock_client = MagicMock() | ||||
|         mock_client.images.get.side_effect = derr.APIError("boom", None) | ||||
|  | ||||
|         ok = local_image_exists("broken:tag", client=mock_client) | ||||
|         self.assertFalse(ok) | ||||
|  | ||||
|     def test_local_image_exists_uses_lazy_singleton(self): | ||||
|         # Patch docker.from_env used by _get_client() | ||||
|         with mock.patch( | ||||
|             "cli.lib.common.docker_helper.docker.from_env" | ||||
|         ) as mock_from_env: | ||||
|             mock_docker_client = MagicMock() | ||||
|             mock_from_env.return_value = mock_docker_client | ||||
|  | ||||
|             # First call should create and cache the client | ||||
|             c1 = _get_client() | ||||
|             self.assertIs(c1, mock_docker_client) | ||||
|             mock_from_env.assert_called_once() | ||||
|  | ||||
|             # Second call should reuse cached client (no extra from_env calls) | ||||
|             c2 = _get_client() | ||||
|             self.assertIs(c2, mock_docker_client) | ||||
|             mock_from_env.assert_called_once()  # still once | ||||
|  | ||||
|     def test_local_image_exists_without_client_param_calls_get_client_once(self): | ||||
|         # Ensure _get_client is called and cached; local_image_exists should reuse it | ||||
|         with mock.patch("cli.lib.common.docker_helper._get_client") as mock_get_client: | ||||
|             mock_client = MagicMock() | ||||
|             mock_get_client.return_value = mock_client | ||||
|  | ||||
|             # 1st call | ||||
|             local_image_exists("repo:tag") | ||||
|             # 2nd call | ||||
|             local_image_exists("repo:tag2") | ||||
|  | ||||
|             # local_image_exists should call _get_client each time, | ||||
|             # but your _get_client itself caches docker.from_env. | ||||
|             self.assertEqual(mock_get_client.call_count, 2) | ||||
|             self.assertEqual(mock_client.images.get.call_count, 2) | ||||
|             mock_client.images.get.assert_any_call("repo:tag") | ||||
|             mock_client.images.get.assert_any_call("repo:tag2") | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     unittest.main() | ||||
| @ -1,149 +0,0 @@ | ||||
| import os | ||||
| import unittest | ||||
| from dataclasses import dataclass | ||||
| from pathlib import Path | ||||
| from unittest.mock import patch | ||||
|  | ||||
| import cli.lib.common.envs_helper as m | ||||
|  | ||||
|  | ||||
| class TestEnvHelpers(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         # Keep a copy of the original environment to restore later | ||||
|         self._env_backup = dict(os.environ) | ||||
|  | ||||
|     def tearDown(self): | ||||
|         # Restore environment to original state | ||||
|         os.environ.clear() | ||||
|         os.environ.update(self._env_backup) | ||||
|  | ||||
|     # -------- get_env -------- | ||||
|     def test_get_env_unset_returns_default(self): | ||||
|         with patch.dict(os.environ, {}, clear=True): | ||||
|             self.assertEqual(m.get_env("FOO", "default"), "default") | ||||
|  | ||||
|     def test_get_env_empty_returns_default(self): | ||||
|         with patch.dict(os.environ, {"FOO": ""}, clear=True): | ||||
|             self.assertEqual(m.get_env("FOO", "default"), "default") | ||||
|  | ||||
|     def test_get_env_set_returns_value(self): | ||||
|         with patch.dict(os.environ, {"FOO": "bar"}, clear=True): | ||||
|             self.assertEqual(m.get_env("FOO", "default"), "bar") | ||||
|  | ||||
|     def test_get_env_not_exist_returns_default(self): | ||||
|         with patch.dict(os.environ, {"FOO": "bar"}, clear=True): | ||||
|             self.assertEqual(m.get_env("TEST_NOT_EXIST", "default"), "default") | ||||
|  | ||||
|     def test_get_env_not_exist_without_default(self): | ||||
|         with patch.dict(os.environ, {"FOO": "bar"}, clear=True): | ||||
|             self.assertEqual(m.get_env("TEST_NOT_EXIST"), "") | ||||
|  | ||||
|     # -------- env_bool -------- | ||||
|     def test_env_bool_uses_default_when_unset(self): | ||||
|         with patch.dict(os.environ, {}, clear=True): | ||||
|             self.assertTrue(m.env_bool("FLAG", default=True)) | ||||
|             self.assertFalse(m.env_bool("FLAG", default=False)) | ||||
|  | ||||
|     def test_env_bool_uses_str2bool_when_set(self): | ||||
|         # Patch str2bool used by env_bool so we don't depend on its exact behavior | ||||
|         def fake_str2bool(s: str) -> bool: | ||||
|             return s.lower() in {"1", "true", "yes", "on", "y"} | ||||
|  | ||||
|         with ( | ||||
|             patch.dict(os.environ, {"FLAG": "yEs"}, clear=True), | ||||
|             patch.object(m, "str2bool", fake_str2bool), | ||||
|         ): | ||||
|             self.assertTrue(m.env_bool("FLAG", default=False)) | ||||
|  | ||||
|     # -------- env_path_optional / env_path -------- | ||||
|     def test_env_path_optional_unset_returns_none_by_default(self): | ||||
|         with patch.dict(os.environ, {}, clear=True): | ||||
|             self.assertIsNone(m.env_path_optional("P")) | ||||
|  | ||||
|     def test_env_path_optional_unset_returns_none_when_env_var_is_empty(self): | ||||
|         with patch.dict(os.environ, {"P": ""}, clear=True): | ||||
|             self.assertIsNone(m.env_path_optional("P")) | ||||
|  | ||||
|     def test_env_path_optional_unset_returns_default_str(self): | ||||
|         # default as string; resolve=True by default -> absolute path | ||||
|         default_str = "x/y" | ||||
|         with patch.dict(os.environ, {}, clear=True): | ||||
|             p = m.env_path_optional("P", default=default_str) | ||||
|             self.assertIsInstance(p, Path) | ||||
|             self.assertIsNotNone(p) | ||||
|             if p: | ||||
|                 self.assertTrue(p.is_absolute()) | ||||
|                 self.assertEqual(p.parts[-2:], ("x", "y")) | ||||
|  | ||||
|     def test_env_path_optional_unset_returns_default_path_no_resolve(self): | ||||
|         d = Path("z") | ||||
|         with patch.dict(os.environ, {}, clear=True): | ||||
|             p = m.env_path_optional("P", default=d, resolve=False) | ||||
|             self.assertEqual(p, d) | ||||
|  | ||||
|     def test_env_path_optional_respects_resolve_true(self): | ||||
|         with patch.dict(os.environ, {"P": "a/b"}, clear=True): | ||||
|             p = m.env_path_optional("P", resolve=True) | ||||
|             self.assertIsInstance(p, Path) | ||||
|             if p: | ||||
|                 self.assertTrue(p.is_absolute()) | ||||
|  | ||||
|     def test_env_path_optional_respects_resolve_false(self): | ||||
|         with patch.dict(os.environ, {"P": "rel/dir"}, clear=True): | ||||
|             p = m.env_path_optional("P", resolve=False) | ||||
|             self.assertEqual(p, Path("rel/dir")) | ||||
|             if p: | ||||
|                 self.assertFalse(p.is_absolute()) | ||||
|  | ||||
|     def test_env_path_raises_when_missing_and_default_none(self): | ||||
|         with patch.dict(os.environ, {}, clear=True): | ||||
|             with self.assertRaises(ValueError): | ||||
|                 m.env_path("P", None, resolve=True) | ||||
|  | ||||
|     def test_env_path_returns_path_when_present(self): | ||||
|         tmp = Path("./b").resolve() | ||||
|         with patch.dict(os.environ, {"P": str(tmp)}, clear=True): | ||||
|             p = m.env_path("P", None, resolve=True) | ||||
|             self.assertEqual(p, tmp) | ||||
|  | ||||
|     # -------- dataclass field helpers -------- | ||||
|     def test_dataclass_fields_read_env_at_instantiation(self): | ||||
|         @dataclass | ||||
|         class Cfg: | ||||
|             flag: bool = m.env_bool_field("FLAG", default=False) | ||||
|             out: Path = m.env_path_field("OUT", default="ab", resolve=True) | ||||
|             name: str = m.env_str_field("NAME", default="anon") | ||||
|  | ||||
|         # First instantiation | ||||
|         with patch.dict( | ||||
|             os.environ, {"FLAG": "true", "OUT": "outdir", "NAME": "alice"}, clear=True | ||||
|         ): | ||||
|             cfg1 = Cfg() | ||||
|             self.assertTrue(cfg1.flag) | ||||
|             self.assertIsInstance(cfg1.out, Path) | ||||
|             self.assertTrue(cfg1.out.is_absolute()) | ||||
|             self.assertEqual(cfg1.name, "alice") | ||||
|             cfg1.name = "bob"  # change instance value | ||||
|             self.assertEqual(cfg1.name, "bob")  # change is reflected | ||||
|  | ||||
|         # Change env; new instance should reflect new values | ||||
|         with patch.dict(os.environ, {"FLAG": "false", "NAME": ""}, clear=True): | ||||
|             cfg2 = Cfg() | ||||
|             self.assertFalse(cfg2.flag)  # str2bool("false") -> False | ||||
|             self.assertTrue("ab" in str(cfg2.out)) | ||||
|             self.assertIsInstance(cfg2.out, Path) | ||||
|             self.assertTrue(cfg2.out.is_absolute()) | ||||
|             self.assertEqual(cfg2.name, "anon")  # empty -> fallback to default | ||||
|  | ||||
|     def test_dataclass_path_field_with_default_value(self): | ||||
|         @dataclass | ||||
|         class C2: | ||||
|             out: Path = m.env_path_field("OUT", default="some/dir", resolve=False) | ||||
|  | ||||
|         with patch.dict(os.environ, {}, clear=True): | ||||
|             c = C2() | ||||
|             self.assertEqual(c.out, Path("some/dir")) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     unittest.main() | ||||
| @ -1,122 +0,0 @@ | ||||
| # test_path_utils.py | ||||
| # Run: pytest -q | ||||
|  | ||||
| import os | ||||
| import unittest | ||||
| from pathlib import Path | ||||
| from tempfile import TemporaryDirectory | ||||
|  | ||||
| from cli.lib.common.path_helper import ( | ||||
|     copy, | ||||
|     ensure_dir_exists, | ||||
|     force_create_dir, | ||||
|     get_path, | ||||
|     is_path_exist, | ||||
|     remove_dir, | ||||
| ) | ||||
|  | ||||
|  | ||||
| class TestPathHelper(unittest.TestCase): | ||||
|     def setUp(self): | ||||
|         self.tmpdir = TemporaryDirectory() | ||||
|         self.tmp_path = Path(self.tmpdir.name) | ||||
|  | ||||
|     def tearDown(self): | ||||
|         self.tmpdir.cleanup() | ||||
|  | ||||
|     # -------- get_path -------- | ||||
|     def test_get_path_returns_path_for_str(self): | ||||
|         # Use relative path to avoid absolute-ness | ||||
|         rel_str = "sub/f.txt" | ||||
|         os.chdir(self.tmp_path) | ||||
|         p = get_path(rel_str, resolve=False) | ||||
|         self.assertIsInstance(p, Path) | ||||
|         self.assertFalse(p.is_absolute()) | ||||
|         self.assertEqual(str(p), rel_str) | ||||
|  | ||||
|     def test_get_path_resolves(self): | ||||
|         rel_str = "sub/f.txt" | ||||
|         p = get_path(str(self.tmp_path / rel_str), resolve=True) | ||||
|         self.assertTrue(p.is_absolute()) | ||||
|         self.assertTrue(str(p).endswith(rel_str)) | ||||
|  | ||||
|     def test_get_path_with_path_input(self): | ||||
|         p_in = self.tmp_path / "sub/f.txt" | ||||
|         p_out = get_path(p_in, resolve=False) | ||||
|         self.assertTrue(str(p_out) == str(p_in)) | ||||
|  | ||||
|     def test_get_path_with_none_raises(self): | ||||
|         with self.assertRaises(ValueError): | ||||
|             get_path(None)  # type: ignore[arg-type] | ||||
|  | ||||
|     def test_get_path_invalid_type_raises(self): | ||||
|         with self.assertRaises(TypeError): | ||||
|             get_path(123)  # type: ignore[arg-type] | ||||
|  | ||||
|     # -------- ensure_dir_exists / force_create_dir / remove_dir -------- | ||||
|     def test_ensure_dir_exists_creates_and_is_idempotent(self): | ||||
|         d = self.tmp_path / "made" | ||||
|         ensure_dir_exists(d) | ||||
|         self.assertTrue(d.exists() and d.is_dir()) | ||||
|         ensure_dir_exists(d) | ||||
|  | ||||
|     def test_force_create_dir_clears_existing(self): | ||||
|         d = self.tmp_path / "fresh" | ||||
|         (d / "inner").mkdir(parents=True) | ||||
|         (d / "inner" / "f.txt").write_text("x") | ||||
|         force_create_dir(d) | ||||
|         self.assertTrue(d.exists()) | ||||
|         self.assertEqual(list(d.iterdir()), []) | ||||
|  | ||||
|     def test_remove_dir_none_is_noop(self): | ||||
|         remove_dir(None)  # type: ignore[arg-type] | ||||
|  | ||||
|     def test_remove_dir_nonexistent_is_noop(self): | ||||
|         ghost = self.tmp_path / "ghost" | ||||
|         remove_dir(ghost) | ||||
|  | ||||
|     def test_remove_dir_accepts_str(self): | ||||
|         d = self.tmp_path / "to_rm" | ||||
|         d.mkdir() | ||||
|         remove_dir(str(d)) | ||||
|         self.assertFalse(d.exists()) | ||||
|  | ||||
|     # -------- copy -------- | ||||
|     def test_copy_file_to_file(self): | ||||
|         src = self.tmp_path / "src.txt" | ||||
|         dst = self.tmp_path / "out" / "dst.txt" | ||||
|         src.write_text("hello") | ||||
|         copy(src, dst) | ||||
|         self.assertEqual(dst.read_text(), "hello") | ||||
|  | ||||
|     def test_copy_dir_to_new_dir(self): | ||||
|         src = self.tmp_path / "srcdir" | ||||
|         (src / "a").mkdir(parents=True) | ||||
|         (src / "a" / "f.txt").write_text("content") | ||||
|         dst = self.tmp_path / "destdir" | ||||
|         copy(src, dst) | ||||
|         self.assertEqual((dst / "a" / "f.txt").read_text(), "content") | ||||
|  | ||||
|     def test_copy_dir_into_existing_dir_overwrite_true_merges(self): | ||||
|         src = self.tmp_path / "srcdir" | ||||
|         dst = self.tmp_path / "destdir" | ||||
|         (src / "x").mkdir(parents=True) | ||||
|         (src / "x" / "new.txt").write_text("new") | ||||
|         dst.mkdir() | ||||
|         (dst / "existing.txt").write_text("old") | ||||
|         copy(src, dst) | ||||
|         self.assertEqual((dst / "existing.txt").read_text(), "old") | ||||
|         self.assertEqual((dst / "x" / "new.txt").read_text(), "new") | ||||
|  | ||||
|     def test_is_str_path_exist(self): | ||||
|         p = self.tmp_path / "x.txt" | ||||
|         p.write_text("1") | ||||
|         self.assertTrue(is_path_exist(str(p))) | ||||
|         self.assertTrue(is_path_exist(p)) | ||||
|         self.assertFalse(is_path_exist(str(self.tmp_path / "missing"))) | ||||
|         self.assertFalse(is_path_exist(self.tmp_path / "missing")) | ||||
|         self.assertFalse(is_path_exist("")) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     unittest.main() | ||||
| @ -1,185 +0,0 @@ | ||||
| # tests/test_run_test_plan.py | ||||
| import importlib | ||||
| from contextlib import nullcontext | ||||
| from types import SimpleNamespace | ||||
| from unittest.mock import MagicMock | ||||
|  | ||||
| import pytest | ||||
|  | ||||
|  | ||||
| MOD = "cli.lib.core.vllm.lib" | ||||
|  | ||||
| # We import inside tests so the MOD override above applies everywhere | ||||
| run_test_plan_import_path = f"{MOD}.run_test_plan" | ||||
|  | ||||
|  | ||||
| def _get_cmd(c): | ||||
|     # Support both kwargs and positional args | ||||
|     return c.kwargs.get("cmd", c.args[0] if c.args else None) | ||||
|  | ||||
|  | ||||
| def _get_check(c): | ||||
|     if "check" in c.kwargs: | ||||
|         return c.kwargs["check"] | ||||
|     # If positional, assume second arg is 'check' when present; default False | ||||
|     return c.args[1] if len(c.args) > 1 else False | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def patch_module(monkeypatch): | ||||
|     """ | ||||
|     Patch helpers ('pip_install_packages', 'temp_environ', 'working_directory', | ||||
|     'run_command', 'logger') inside the target module and expose them. | ||||
|     """ | ||||
|     module = importlib.import_module(MOD) | ||||
|  | ||||
|     # Create fakes/mocks | ||||
|     pip_install_packages = MagicMock(name="pip_install_packages") | ||||
|     run_command = MagicMock(name="run_command", return_value=0) | ||||
|  | ||||
|     # temp_environ / working_directory: record calls but act as context managers | ||||
|     temp_calls: list[dict] = [] | ||||
|     workdir_calls: list[str] = [] | ||||
|  | ||||
|     def fake_working_directory(path: str): | ||||
|         workdir_calls.append(path) | ||||
|         return nullcontext() | ||||
|  | ||||
|     def fake_temp_env(map: dict[str, str]): | ||||
|         temp_calls.append(map) | ||||
|         return nullcontext() | ||||
|  | ||||
|     logger = SimpleNamespace( | ||||
|         info=MagicMock(name="logger.info"), | ||||
|         error=MagicMock(name="logger.error"), | ||||
|     ) | ||||
|  | ||||
|     # Apply patches (raise if attribute doesn't exist) | ||||
|     monkeypatch.setattr( | ||||
|         module, "pip_install_packages", pip_install_packages, raising=True | ||||
|     ) | ||||
|     monkeypatch.setattr(module, "run_command", run_command, raising=True) | ||||
|     monkeypatch.setattr( | ||||
|         module, "working_directory", fake_working_directory, raising=True | ||||
|     ) | ||||
|     monkeypatch.setattr(module, "temp_environ", fake_temp_env, raising=True) | ||||
|     monkeypatch.setattr(module, "logger", logger, raising=True) | ||||
|  | ||||
|     return SimpleNamespace( | ||||
|         module=module, | ||||
|         run_test_plan=module.run_test_plan,  # expose to avoid getattr("constant") (Ruff B009) | ||||
|         pip_install_packages=pip_install_packages, | ||||
|         run_command=run_command, | ||||
|         temp_calls=temp_calls, | ||||
|         workdir_calls=workdir_calls, | ||||
|         logger=logger, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_success_runs_all_steps_and_uses_env_and_workdir(monkeypatch, patch_module): | ||||
|     run_test_plan = patch_module.run_test_plan | ||||
|  | ||||
|     tests_map = { | ||||
|         "basic": { | ||||
|             "title": "Basic suite", | ||||
|             "package_install": [], | ||||
|             "working_directory": "tests", | ||||
|             "env_vars": {"GLOBAL_FLAG": "1"}, | ||||
|             "steps": [ | ||||
|                 "export A=x && pytest -q", | ||||
|                 "export B=y && pytest -q tests/unit", | ||||
|             ], | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     # One exit code per step (export + two pytest) | ||||
|     patch_module.run_command.side_effect = [0, 0, 0] | ||||
|  | ||||
|     run_test_plan("basic", "cpu", tests_map) | ||||
|  | ||||
|     calls = patch_module.run_command.call_args_list | ||||
|     cmds = [_get_cmd(c) for c in calls] | ||||
|     checks = [_get_check(c) for c in calls] | ||||
|  | ||||
|     assert cmds == [ | ||||
|         "export A=x && pytest -q", | ||||
|         "export B=y && pytest -q tests/unit", | ||||
|     ] | ||||
|     assert all(chk is False for chk in checks) | ||||
|  | ||||
|     assert patch_module.workdir_calls == ["tests"] | ||||
|     assert patch_module.temp_calls == [{"GLOBAL_FLAG": "1"}] | ||||
|  | ||||
|  | ||||
| def test_installs_packages_when_present(monkeypatch, patch_module): | ||||
|     run_test_plan = patch_module.module.run_test_plan | ||||
|  | ||||
|     tests_map = { | ||||
|         "with_pkgs": { | ||||
|             "title": "Needs deps", | ||||
|             "package_install": ["timm==1.0.0", "flash-attn"], | ||||
|             "steps": ["pytest -q"], | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     patch_module.run_command.return_value = 0 | ||||
|  | ||||
|     run_test_plan("with_pkgs", "gpu", tests_map) | ||||
|  | ||||
|     patch_module.pip_install_packages.assert_called_once_with( | ||||
|         packages=["timm==1.0.0", "flash-attn"], | ||||
|         prefer_uv=True, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def test_raises_on_missing_plan(patch_module): | ||||
|     run_test_plan = patch_module.module.run_test_plan | ||||
|     with pytest.raises(RuntimeError) as ei: | ||||
|         run_test_plan("nope", "cpu", tests_map={}) | ||||
|  | ||||
|     assert "test nope not found" in str(ei.value) | ||||
|  | ||||
|  | ||||
| def test_aggregates_failures_and_raises(monkeypatch, patch_module): | ||||
|     run_test_plan = patch_module.module.run_test_plan | ||||
|  | ||||
|     tests_map = { | ||||
|         "mix": { | ||||
|             "title": "Some pass some fail", | ||||
|             "steps": [ | ||||
|                 "pytest test_a.py",  # 0 → pass | ||||
|                 "pytest test_b.py",  # 1 → fail | ||||
|                 "pytest test_c.py",  # 2 → fail | ||||
|             ], | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     # Simulate pass, fail, fail | ||||
|     patch_module.run_command.side_effect = [0, 1, 2] | ||||
|  | ||||
|     with pytest.raises(RuntimeError) as ei: | ||||
|         run_test_plan("mix", "cpu", tests_map) | ||||
|  | ||||
|     msg = str(ei.value) | ||||
|     assert "2 pytest runs failed" in msg | ||||
|     # Ensure logger captured failed tests list | ||||
|     patch_module.logger.error.assert_called_once() | ||||
|     # And we attempted all three commands | ||||
|     assert patch_module.run_command.call_count == 3 | ||||
|  | ||||
|  | ||||
| def test_custom_working_directory_used(patch_module): | ||||
|     run_test_plan = patch_module.module.run_test_plan | ||||
|  | ||||
|     tests_map = { | ||||
|         "customwd": { | ||||
|             "title": "Custom wd", | ||||
|             "working_directory": "examples/ci", | ||||
|             "steps": ["pytest -q"], | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     patch_module.run_command.return_value = 0 | ||||
|     run_test_plan("customwd", "cpu", tests_map) | ||||
|  | ||||
|     assert patch_module.workdir_calls == ["examples/ci"] | ||||
| @ -1,143 +0,0 @@ | ||||
| import os | ||||
| import tempfile | ||||
| import unittest | ||||
| from pathlib import Path | ||||
|  | ||||
| from cli.lib.common.utils import temp_environ, working_directory  # <-- replace import | ||||
|  | ||||
|  | ||||
| class EnvIsolatedTestCase(unittest.TestCase): | ||||
|     """Base class that snapshots os.environ and CWD for isolation.""" | ||||
|  | ||||
|     def setUp(self): | ||||
|         import os | ||||
|         import tempfile | ||||
|  | ||||
|         self._env_backup = dict(os.environ) | ||||
|  | ||||
|         # Snapshot/repair CWD if it's gone | ||||
|         try: | ||||
|             self._cwd_backup = os.getcwd() | ||||
|         except FileNotFoundError: | ||||
|             # If CWD no longer exists, switch to a safe place and record that | ||||
|             self._cwd_backup = tempfile.gettempdir() | ||||
|             os.chdir(self._cwd_backup) | ||||
|  | ||||
|         # Create a temporary directory for the test to run in | ||||
|         self._temp_dir = tempfile.mkdtemp() | ||||
|         os.chdir(self._temp_dir) | ||||
|  | ||||
|     def tearDown(self): | ||||
|         import os | ||||
|         import shutil | ||||
|         import tempfile | ||||
|  | ||||
|         # Restore cwd first (before cleaning up temp dir) | ||||
|         try: | ||||
|             os.chdir(self._cwd_backup) | ||||
|         except OSError: | ||||
|             os.chdir(tempfile.gettempdir()) | ||||
|  | ||||
|         # Clean up temporary directory | ||||
|         try: | ||||
|             shutil.rmtree(self._temp_dir, ignore_errors=True) | ||||
|         except Exception: | ||||
|             pass  # Ignore cleanup errors | ||||
|  | ||||
|         # Restore env | ||||
|         to_del = set(os.environ.keys()) - set(self._env_backup.keys()) | ||||
|         for k in to_del: | ||||
|             os.environ.pop(k, None) | ||||
|         for k, v in self._env_backup.items(): | ||||
|             os.environ[k] = v | ||||
|  | ||||
|  | ||||
| class TestTempEnviron(EnvIsolatedTestCase): | ||||
|     def test_sets_and_restores_new_var(self): | ||||
|         var = "TEST_TMP_ENV_NEW" | ||||
|         self.assertNotIn(var, os.environ) | ||||
|  | ||||
|         with temp_environ({var: "123"}): | ||||
|             self.assertEqual(os.environ[var], "123") | ||||
|  | ||||
|         self.assertNotIn(var, os.environ)  # removed after exit | ||||
|  | ||||
|     def test_overwrites_and_restores_existing_var(self): | ||||
|         var = "TEST_TMP_ENV_OVERWRITE" | ||||
|         os.environ[var] = "orig" | ||||
|  | ||||
|         with temp_environ({var: "override"}): | ||||
|             self.assertEqual(os.environ[var], "override") | ||||
|  | ||||
|         self.assertEqual(os.environ[var], "orig")  # restored | ||||
|  | ||||
|     def test_multiple_vars_and_missing_cleanup(self): | ||||
|         v1, v2 = "TEST_ENV_V1", "TEST_ENV_V2" | ||||
|         os.environ.pop(v1, None) | ||||
|         os.environ[v2] = "keep" | ||||
|  | ||||
|         with temp_environ({v1: "a", v2: "b"}): | ||||
|             self.assertEqual(os.environ[v1], "a") | ||||
|             self.assertEqual(os.environ[v2], "b") | ||||
|  | ||||
|         self.assertNotIn(v1, os.environ)  # newly-added -> removed | ||||
|         self.assertEqual(os.environ[v2], "keep")  # pre-existing -> restored | ||||
|  | ||||
|     def test_restores_even_on_exception(self): | ||||
|         var = "TEST_TMP_ENV_EXCEPTION" | ||||
|         self.assertNotIn(var, os.environ) | ||||
|  | ||||
|         with self.assertRaises(RuntimeError): | ||||
|             with temp_environ({var: "x"}): | ||||
|                 self.assertEqual(os.environ[var], "x") | ||||
|                 raise RuntimeError("boom") | ||||
|  | ||||
|         self.assertNotIn(var, os.environ)  # removed after exception | ||||
|  | ||||
|  | ||||
| class TestWorkingDirectory(EnvIsolatedTestCase): | ||||
|     def test_changes_and_restores(self): | ||||
|         start = Path.cwd() | ||||
|         with tempfile.TemporaryDirectory() as td: | ||||
|             target = Path(td) / "wd" | ||||
|             target.mkdir() | ||||
|  | ||||
|             with working_directory(str(target)): | ||||
|                 self.assertEqual(Path.cwd().resolve(), target.resolve()) | ||||
|  | ||||
|         self.assertEqual(Path.cwd(), start) | ||||
|  | ||||
|     def test_noop_when_empty_path(self): | ||||
|         start = Path.cwd() | ||||
|         with working_directory(""): | ||||
|             self.assertEqual(Path.cwd(), start) | ||||
|         self.assertEqual(Path.cwd(), start) | ||||
|  | ||||
|     def test_restores_on_exception(self): | ||||
|         start = Path.cwd() | ||||
|  | ||||
|         with tempfile.TemporaryDirectory() as td: | ||||
|             target = Path(td) / "wd_exc" | ||||
|             target.mkdir() | ||||
|  | ||||
|             with self.assertRaises(ValueError): | ||||
|                 with working_directory(str(target)): | ||||
|                     # Normalize both sides to handle /var -> /private/var | ||||
|                     self.assertEqual(Path.cwd().resolve(), target.resolve()) | ||||
|                     raise ValueError("boom") | ||||
|  | ||||
|         self.assertEqual(Path.cwd().resolve(), start.resolve()) | ||||
|  | ||||
|     def test_raises_for_missing_dir(self): | ||||
|         start = Path.cwd() | ||||
|         with tempfile.TemporaryDirectory() as td: | ||||
|             missing = Path(td) / "does_not_exist" | ||||
|             with self.assertRaises(FileNotFoundError): | ||||
|                 # os.chdir should raise before yielding | ||||
|                 with working_directory(str(missing)): | ||||
|                     pass | ||||
|         self.assertEqual(Path.cwd(), start) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     unittest.main(verbosity=2) | ||||
| @ -1,176 +0,0 @@ | ||||
| import os | ||||
| import tempfile | ||||
| import unittest | ||||
| from pathlib import Path | ||||
| from unittest.mock import MagicMock, patch | ||||
|  | ||||
| import cli.lib.core.vllm.vllm_build as vllm_build | ||||
|  | ||||
|  | ||||
| _VLLM_BUILD_MODULE = "cli.lib.core.vllm.vllm_build" | ||||
|  | ||||
|  | ||||
| class TestVllmBuildParameters(unittest.TestCase): | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.local_image_exists", return_value=True) | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.is_path_exist", return_value=True) | ||||
|     @patch( | ||||
|         "cli.lib.common.envs_helper.env_path_optional", | ||||
|         side_effect=lambda name, default=None, resolve=True: { | ||||
|             "DOCKERFILE_PATH": Path("/abs/vllm/Dockerfile"), | ||||
|             "TORCH_WHEELS_PATH": Path("/abs/dist"), | ||||
|             "OUTPUT_DIR": Path("/abs/shared"), | ||||
|         }.get(name, Path(default) if default is not None else None), | ||||
|     ) | ||||
|     @patch.dict( | ||||
|         os.environ, | ||||
|         { | ||||
|             "USE_TORCH_WHEEL": "1", | ||||
|             "USE_LOCAL_BASE_IMAGE": "1", | ||||
|             "USE_LOCAL_DOCKERFILE": "1", | ||||
|             "BASE_IMAGE": "my/image:tag", | ||||
|             "DOCKERFILE_PATH": "vllm/Dockerfile", | ||||
|             "TORCH_WHEELS_PATH": "dist", | ||||
|             "OUTPUT_DIR": "shared", | ||||
|         }, | ||||
|         clear=True, | ||||
|     ) | ||||
|     def test_params_success_normalizes_and_validates( | ||||
|         self, mock_env_path, mock_is_path, mock_local_img | ||||
|     ): | ||||
|         params = vllm_build.VllmBuildParameters() | ||||
|         self.assertEqual(params.torch_whls_path, Path("/abs/dist")) | ||||
|         self.assertEqual(params.dockerfile_path, Path("/abs/vllm/Dockerfile")) | ||||
|         self.assertEqual(params.output_dir, Path("/abs/shared")) | ||||
|         self.assertEqual(params.base_image, "my/image:tag") | ||||
|  | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.is_path_exist", return_value=False) | ||||
|     @patch.dict( | ||||
|         os.environ, {"USE_TORCH_WHEEL": "1", "TORCH_WHEELS_PATH": "dist"}, clear=True | ||||
|     ) | ||||
|     def test_params_missing_torch_whls_raises(self, _is_path): | ||||
|         with tempfile.TemporaryDirectory() as td: | ||||
|             os.chdir(td) | ||||
|             with self.assertRaises(ValueError) as cm: | ||||
|                 vllm_build.VllmBuildParameters( | ||||
|                     use_local_base_image=False, | ||||
|                     use_local_dockerfile=False, | ||||
|                 ) | ||||
|         err = cm.exception | ||||
|         self.assertIn("TORCH_WHEELS_PATH", str(err)) | ||||
|  | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.local_image_exists", return_value=False) | ||||
|     @patch.dict( | ||||
|         os.environ, {"USE_LOCAL_BASE_IMAGE": "1", "BASE_IMAGE": "img:tag"}, clear=True | ||||
|     ) | ||||
|     def test_params_missing_local_base_image_raises(self, _local_img): | ||||
|         with tempfile.TemporaryDirectory() as td: | ||||
|             os.chdir(td) | ||||
|             with self.assertRaises(ValueError) as cm: | ||||
|                 vllm_build.VllmBuildParameters( | ||||
|                     use_torch_whl=False, | ||||
|                     use_local_dockerfile=False, | ||||
|                 ) | ||||
|         err = cm.exception | ||||
|         self.assertIn("BASE_IMAGE", str(err)) | ||||
|  | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.is_path_exist", return_value=False) | ||||
|     @patch.dict( | ||||
|         os.environ, | ||||
|         {"USE_LOCAL_DOCKERFILE": "1", "DOCKERFILE_PATH": "Dockerfile"}, | ||||
|         clear=True, | ||||
|     ) | ||||
|     def test_params_missing_dockerfile_raises(self, _is_path): | ||||
|         with tempfile.TemporaryDirectory() as td: | ||||
|             os.chdir(td) | ||||
|             with self.assertRaises(ValueError) as cm: | ||||
|                 vllm_build.VllmBuildParameters( | ||||
|                     use_torch_whl=False, | ||||
|                     use_local_base_image=False, | ||||
|                 ) | ||||
|         err = cm.exception | ||||
|         self.assertIn("DOCKERFILE_PATH", str(err)) | ||||
|  | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.is_path_exist", return_value=False) | ||||
|     @patch.dict( | ||||
|         os.environ, | ||||
|         {"OUTPUT_DIR": ""}, | ||||
|         clear=True, | ||||
|     ) | ||||
|     def test_params_missing_output_dir(self, _is_path): | ||||
|         with self.assertRaises(FileNotFoundError): | ||||
|             vllm_build.VllmBuildParameters() | ||||
|  | ||||
|  | ||||
| class TestBuildCmdAndRun(unittest.TestCase): | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.local_image_exists", return_value=True) | ||||
|     def test_generate_docker_build_cmd_includes_bits(self, _exists): | ||||
|         runner = vllm_build.VllmBuildRunner() | ||||
|         inputs = MagicMock() | ||||
|         inputs.output_dir = Path("/abs/out") | ||||
|         inputs.use_local_base_image = True | ||||
|         inputs.base_image = "img:tag" | ||||
|         inputs.torch_whls_path = Path("./vllm/tmp") | ||||
|         inputs.max_jobs = 64 | ||||
|         inputs.cuda_version = "12.8.1" | ||||
|         inputs.python_version = "3.12" | ||||
|         inputs.sccache_bucket = "my-bucket" | ||||
|         inputs.sccache_region = "us-west-2" | ||||
|         inputs.torch_cuda_arch_list = "8.0;9.0" | ||||
|         inputs.target_stage = "export-wheels" | ||||
|         inputs.tag_name = "vllm-wheels" | ||||
|  | ||||
|         cmd = runner._generate_docker_build_cmd(inputs) | ||||
|         squashed = " ".join(cmd.split()) | ||||
|  | ||||
|         self.assertIn("--output type=local,dest=/abs/out", squashed) | ||||
|         self.assertIn("-f docker/Dockerfile.nightly_torch", squashed) | ||||
|         self.assertIn("--pull=false", squashed) | ||||
|         self.assertIn("--build-arg TORCH_WHEELS_PATH=tmp", squashed) | ||||
|         self.assertIn("--build-arg BUILD_BASE_IMAGE=img:tag", squashed) | ||||
|         self.assertIn("--build-arg FINAL_BASE_IMAGE=img:tag", squashed) | ||||
|         self.assertIn("--build-arg max_jobs=64", squashed) | ||||
|         self.assertIn("--build-arg CUDA_VERSION=12.8.1", squashed) | ||||
|         self.assertIn("--build-arg PYTHON_VERSION=3.12", squashed) | ||||
|         self.assertIn("--build-arg USE_SCCACHE=1", squashed) | ||||
|         self.assertIn("--build-arg SCCACHE_BUCKET_NAME=my-bucket", squashed) | ||||
|         self.assertIn("--build-arg SCCACHE_REGION_NAME=us-west-2", squashed) | ||||
|         self.assertIn("--build-arg torch_cuda_arch_list='8.0;9.0'", squashed) | ||||
|         self.assertIn("--target export-wheels", squashed) | ||||
|         self.assertIn("-t vllm-wheels", squashed) | ||||
|  | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.run_command") | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.ensure_dir_exists") | ||||
|     @patch(f"{_VLLM_BUILD_MODULE}.clone_vllm") | ||||
|     @patch.object( | ||||
|         vllm_build.VllmBuildRunner, | ||||
|         "_generate_docker_build_cmd", | ||||
|         return_value="docker buildx ...", | ||||
|     ) | ||||
|     @patch.dict( | ||||
|         os.environ, | ||||
|         { | ||||
|             "USE_TORCH_WHEEL": "0", | ||||
|             "USE_LOCAL_BASE_IMAGE": "0", | ||||
|             "USE_LOCAL_DOCKERFILE": "0", | ||||
|             "OUTPUT_DIR": "shared", | ||||
|         }, | ||||
|         clear=True, | ||||
|     ) | ||||
|     def test_run_calls_clone_prepare_and_build( | ||||
|         self, mock_gen, mock_clone, mock_ensure, mock_run | ||||
|     ): | ||||
|         params = MagicMock() | ||||
|         params.output_dir = Path("shared") | ||||
|         params.use_local_dockerfile = False | ||||
|         params.use_torch_whl = False | ||||
|  | ||||
|         with patch(f"{_VLLM_BUILD_MODULE}.VllmBuildParameters", return_value=params): | ||||
|             runner = vllm_build.VllmBuildRunner() | ||||
|             runner.run() | ||||
|  | ||||
|         mock_clone.assert_called_once() | ||||
|         mock_ensure.assert_called_once_with(Path("shared")) | ||||
|         mock_gen.assert_called_once_with(params) | ||||
|         mock_run.assert_called_once() | ||||
|         _, kwargs = mock_run.call_args | ||||
|         assert kwargs.get("cwd") == "vllm" | ||||
| @ -16,7 +16,6 @@ DOCKER_RUN = set -eou pipefail; ${DOCKER_CMD} run --rm -i \ | ||||
| 	magma/build_magma.sh | ||||
|  | ||||
| .PHONY: all | ||||
| all: magma-cuda130 | ||||
| all: magma-cuda129 | ||||
| all: magma-cuda128 | ||||
| all: magma-cuda126 | ||||
| @ -26,12 +25,6 @@ clean: | ||||
| 	$(RM) -r magma-* | ||||
| 	$(RM) -r output | ||||
|  | ||||
| .PHONY: magma-cuda130 | ||||
| magma-cuda130: DESIRED_CUDA := 13.0 | ||||
| magma-cuda130: CUDA_ARCH_LIST := -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120 | ||||
| magma-cuda130: | ||||
| 	$(DOCKER_RUN) | ||||
|  | ||||
| .PHONY: magma-cuda129 | ||||
| magma-cuda129: DESIRED_CUDA := 12.9 | ||||
| magma-cuda129: CUDA_ARCH_LIST += -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120 | ||||
|  | ||||
| @ -28,7 +28,6 @@ pushd ${PACKAGE_DIR}/magma-${MAGMA_VERSION} | ||||
| patch < ${PACKAGE_FILES}/CMake.patch | ||||
| patch < ${PACKAGE_FILES}/cmakelists.patch | ||||
| patch -p0 < ${PACKAGE_FILES}/thread_queue.patch | ||||
| patch -p1 < ${PACKAGE_FILES}/cuda13.patch | ||||
| patch -p1 < ${PACKAGE_FILES}/getrf_shfl.patch | ||||
| patch -p1 < ${PACKAGE_FILES}/getrf_nbparam.patch | ||||
| # The build.sh script expects to be executed from the sources root folder | ||||
| @ -38,7 +37,6 @@ popd | ||||
| # Package recipe, license and tarball | ||||
| # Folder and package name are backward compatible for the build workflow | ||||
| cp ${PACKAGE_FILES}/build.sh ${PACKAGE_RECIPE}/build.sh | ||||
| cp ${PACKAGE_FILES}/cuda13.patch ${PACKAGE_RECIPE}/cuda13.patch | ||||
| cp ${PACKAGE_FILES}/thread_queue.patch ${PACKAGE_RECIPE}/thread_queue.patch | ||||
| cp ${PACKAGE_FILES}/cmakelists.patch ${PACKAGE_RECIPE}/cmakelists.patch | ||||
| cp ${PACKAGE_FILES}/getrf_shfl.patch ${PACKAGE_RECIPE}/getrf_shfl.patch | ||||
|  | ||||
| @ -1,26 +0,0 @@ | ||||
| diff --git a/interface_cuda/interface.cpp b/interface_cuda/interface.cpp | ||||
| index 73fed1b20..e77519bfe 100644 | ||||
| --- a/interface_cuda/interface.cpp | ||||
| +++ b/interface_cuda/interface.cpp | ||||
| @@ -438,14 +438,20 @@ magma_print_environment() | ||||
|          cudaDeviceProp prop; | ||||
|          err = cudaGetDeviceProperties( &prop, dev ); | ||||
|          check_error( err ); | ||||
| +        #ifdef MAGMA_HAVE_CUDA | ||||
| +#if CUDA_VERSION < 13000 | ||||
|          printf( "%% device %d: %s, %.1f MHz clock, %.1f MiB memory, capability %d.%d\n", | ||||
|                  dev, | ||||
|                  prop.name, | ||||
|                  prop.clockRate / 1000., | ||||
| +#else | ||||
| +        printf( "%% device %d: %s, ??? MHz clock, %.1f MiB memory, capability %d.%d\n", | ||||
| +                dev, | ||||
| +                prop.name, | ||||
| +#endif | ||||
|                  prop.totalGlobalMem / (1024.*1024.), | ||||
|                  prop.major, | ||||
|                  prop.minor ); | ||||
| -        #ifdef MAGMA_HAVE_CUDA | ||||
|          int arch = prop.major*100 + prop.minor*10; | ||||
|          if ( arch < MAGMA_CUDA_ARCH_MIN ) { | ||||
|              printf("\n" | ||||
| @ -66,9 +66,6 @@ case ${CUDA_VERSION} in | ||||
|             TORCH_CUDA_ARCH_LIST="7.5;8.0;9.0;10.0;12.0+PTX" | ||||
|         fi | ||||
|         ;; | ||||
|     13.0) | ||||
|         TORCH_CUDA_ARCH_LIST="7.5;8.0;8.6;9.0;10.0;12.0+PTX" | ||||
|         ;; | ||||
|     12.6) | ||||
|         TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0" | ||||
|         ;; | ||||
| @ -113,15 +110,11 @@ DEPS_SONAME=( | ||||
| ) | ||||
|  | ||||
|  | ||||
| # CUDA_VERSION 12.*, 13.* | ||||
| if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then | ||||
| # CUDA_VERSION 12.6, 12.8, 12.9 | ||||
| if [[ $CUDA_VERSION == 12* ]]; then | ||||
|     export USE_STATIC_CUDNN=0 | ||||
|     # Try parallelizing nvcc as well | ||||
|     TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" | ||||
|     # Compress the fatbin with -compress-mode=size for CUDA 13 | ||||
|     if [[ $CUDA_VERSION == 13* ]]; then | ||||
|         export TORCH_NVCC_FLAGS="$TORCH_NVCC_FLAGS -compress-mode=size" | ||||
|     fi | ||||
|     export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2" | ||||
|     if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then | ||||
|         echo "Bundling with cudnn and cublas." | ||||
|         DEPS_LIST+=( | ||||
| @ -141,7 +134,7 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then | ||||
|             "/usr/local/cuda/lib64/libnvrtc-builtins.so" | ||||
|             "/usr/local/cuda/lib64/libcufile.so.0" | ||||
|             "/usr/local/cuda/lib64/libcufile_rdma.so.1" | ||||
|             "/usr/local/cuda/lib64/libnvshmem_host.so.3" | ||||
|             "/usr/local/cuda/lib64/libnvshem_host.so.3" | ||||
|             "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12" | ||||
|             "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so" | ||||
|         ) | ||||
| @ -174,29 +167,22 @@ if [[ $CUDA_VERSION == 12* || $CUDA_VERSION == 13* ]]; then | ||||
|     else | ||||
|         echo "Using nvidia libs from pypi." | ||||
|         CUDA_RPATHS=( | ||||
|             '$ORIGIN/../../nvidia/cublas/lib' | ||||
|             '$ORIGIN/../../nvidia/cuda_cupti/lib' | ||||
|             '$ORIGIN/../../nvidia/cuda_nvrtc/lib' | ||||
|             '$ORIGIN/../../nvidia/cuda_runtime/lib' | ||||
|             '$ORIGIN/../../nvidia/cudnn/lib' | ||||
|             '$ORIGIN/../../nvidia/nvshmem/lib' | ||||
|             '$ORIGIN/../../nvidia/nccl/lib' | ||||
|             '$ORIGIN/../../nvidia/cufft/lib' | ||||
|             '$ORIGIN/../../nvidia/curand/lib' | ||||
|             '$ORIGIN/../../nvidia/cusolver/lib' | ||||
|             '$ORIGIN/../../nvidia/cusparse/lib' | ||||
|             '$ORIGIN/../../nvidia/cusparselt/lib' | ||||
|             '$ORIGIN/../../cusparselt/lib' | ||||
|             '$ORIGIN/../../nvidia/nccl/lib' | ||||
|             '$ORIGIN/../../nvidia/nvshmem/lib' | ||||
|             '$ORIGIN/../../nvidia/nvtx/lib' | ||||
|             '$ORIGIN/../../nvidia/cufile/lib' | ||||
|         ) | ||||
|         if [[ $CUDA_VERSION == 13* ]]; then | ||||
|             CUDA_RPATHS+=('$ORIGIN/../../nvidia/cu13/lib') | ||||
|         else | ||||
|             CUDA_RPATHS+=( | ||||
|                 '$ORIGIN/../../nvidia/cublas/lib' | ||||
|                 '$ORIGIN/../../nvidia/cuda_cupti/lib' | ||||
|                 '$ORIGIN/../../nvidia/cuda_nvrtc/lib' | ||||
|                 '$ORIGIN/../../nvidia/cuda_runtime/lib' | ||||
|                 '$ORIGIN/../../nvidia/cufft/lib' | ||||
|                 '$ORIGIN/../../nvidia/curand/lib' | ||||
|                 '$ORIGIN/../../nvidia/cusolver/lib' | ||||
|                 '$ORIGIN/../../nvidia/cusparse/lib' | ||||
|                 '$ORIGIN/../../cusparselt/lib' | ||||
|                 '$ORIGIN/../../nvidia/nvtx/lib' | ||||
|                 '$ORIGIN/../../nvidia/cufile/lib' | ||||
|             ) | ||||
|         fi | ||||
|  | ||||
|         CUDA_RPATHS=$(IFS=: ; echo "${CUDA_RPATHS[*]}") | ||||
|         export C_SO_RPATH=$CUDA_RPATHS':$ORIGIN:$ORIGIN/lib' | ||||
|         export LIB_SO_RPATH=$CUDA_RPATHS':$ORIGIN' | ||||
|  | ||||
| @ -25,7 +25,6 @@ source /opt/intel/oneapi/mpi/latest/env/vars.sh | ||||
| export USE_STATIC_MKL=1 | ||||
| export USE_ONEMKL=1 | ||||
| export USE_XCCL=1 | ||||
| export USE_MPI=0 | ||||
|  | ||||
| WHEELHOUSE_DIR="wheelhousexpu" | ||||
| LIBTORCH_HOUSE_DIR="libtorch_housexpu" | ||||
|  | ||||
| @ -92,27 +92,6 @@ if [[ "$BUILD_ENVIRONMENT" == *aarch64* ]]; then | ||||
|   export ACL_ROOT_DIR=/ComputeLibrary | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *riscv64* ]]; then | ||||
|   if [[ -f /opt/riscv-cross-env/bin/activate ]]; then | ||||
|     # shellcheck disable=SC1091 | ||||
|     source /opt/riscv-cross-env/bin/activate | ||||
|   else | ||||
|     echo "Activation file not found" | ||||
|     exit 1 | ||||
|   fi | ||||
|  | ||||
|   export CMAKE_CROSSCOMPILING=TRUE | ||||
|   export CMAKE_SYSTEM_NAME=Linux | ||||
|   export CMAKE_SYSTEM_PROCESSOR=riscv64 | ||||
|  | ||||
|   export USE_CUDA=0 | ||||
|   export USE_MKLDNN=0 | ||||
|  | ||||
|   export SLEEF_TARGET_EXEC_USE_QEMU=ON | ||||
|   sudo chown -R jenkins /var/lib/jenkins/workspace /opt | ||||
|  | ||||
| fi | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *libtorch* ]]; then | ||||
|   POSSIBLE_JAVA_HOMES=() | ||||
|   POSSIBLE_JAVA_HOMES+=(/usr/local) | ||||
| @ -173,7 +152,6 @@ if [[ "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|   source /opt/intel/oneapi/mpi/latest/env/vars.sh | ||||
|   # Enable XCCL build | ||||
|   export USE_XCCL=1 | ||||
|   export USE_MPI=0 | ||||
|   # XPU kineto feature dependencies are not fully ready, disable kineto build as temp WA | ||||
|   export USE_KINETO=0 | ||||
|   export TORCH_XPU_ARCH_LIST=pvc | ||||
| @ -195,16 +173,8 @@ fi | ||||
|  | ||||
| # We only build FlashAttention files for CUDA 8.0+, and they require large amounts of | ||||
| # memory to build and will OOM | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *cuda* ]] && echo "${TORCH_CUDA_ARCH_LIST}" | tr ' ' '\n' | sed 's/$/>= 8.0/' | bc | grep -q 1; then | ||||
|   J=2  # default to 2 jobs | ||||
|   case "$RUNNER" in | ||||
|     linux.12xlarge.memory|linux.24xlarge.memory) | ||||
|       J=24 | ||||
|       ;; | ||||
|   esac | ||||
|   echo "Building FlashAttention with job limit $J" | ||||
|   export BUILD_CUSTOM_STEP="ninja -C build flash_attention -j ${J}" | ||||
|   export BUILD_CUSTOM_STEP="ninja -C build flash_attention -j 2" | ||||
| fi | ||||
|  | ||||
| if [[ "${BUILD_ENVIRONMENT}" == *clang* ]]; then | ||||
| @ -239,7 +209,7 @@ fi | ||||
|  | ||||
| # Do not change workspace permissions for ROCm and s390x CI jobs | ||||
| # as it can leave workspace with bad permissions for cancelled jobs | ||||
| if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *riscv64* && -d /var/lib/jenkins/workspace ]]; then | ||||
| if [[ "$BUILD_ENVIRONMENT" != *rocm* && "$BUILD_ENVIRONMENT" != *s390x* && -d /var/lib/jenkins/workspace ]]; then | ||||
|   # Workaround for dind-rootless userid mapping (https://github.com/pytorch/ci-infra/issues/96) | ||||
|   WORKSPACE_ORIGINAL_OWNER_ID=$(stat -c '%u' "/var/lib/jenkins/workspace") | ||||
|   cleanup_workspace() { | ||||
| @ -284,7 +254,8 @@ else | ||||
|     # XLA test build fails when WERROR=1 | ||||
|     # set only when building other architectures | ||||
|     # or building non-XLA tests. | ||||
|     if [[ "$BUILD_ENVIRONMENT" != *rocm*  && "$BUILD_ENVIRONMENT" != *xla* && "$BUILD_ENVIRONMENT" != *riscv64* ]]; then | ||||
|     if [[ "$BUILD_ENVIRONMENT" != *rocm*  && | ||||
|           "$BUILD_ENVIRONMENT" != *xla* ]]; then | ||||
|       # Install numpy-2.0.2 for builds which are backward compatible with 1.X | ||||
|       python -mpip install numpy==2.0.2 | ||||
|  | ||||
| @ -421,7 +392,7 @@ if [[ "$BUILD_ENVIRONMENT" != *libtorch* && "$BUILD_ENVIRONMENT" != *bazel* ]]; | ||||
|   # don't do this for libtorch as libtorch is C++ only and thus won't have python tests run on its build | ||||
|   python tools/stats/export_test_times.py | ||||
| fi | ||||
| # don't do this for bazel or s390x or riscv64 as they don't use sccache | ||||
| if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *riscv64* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then | ||||
| # don't do this for bazel or s390x as they don't use sccache | ||||
| if [[ "$BUILD_ENVIRONMENT" != *s390x* && "$BUILD_ENVIRONMENT" != *-bazel-* ]]; then | ||||
|   print_sccache_stats | ||||
| fi | ||||
|  | ||||
| @ -300,3 +300,24 @@ except RuntimeError as e: | ||||
|     exit 1 | ||||
|   fi | ||||
| fi | ||||
|  | ||||
| ############################################################################### | ||||
| # Check for C++ ABI compatibility to GCC-11 - GCC 13 | ||||
| ############################################################################### | ||||
| if [[ "$(uname)" == 'Linux' &&  "$PACKAGE_TYPE" == 'manywheel' ]]; then | ||||
|   pushd /tmp | ||||
|   # Per https://gcc.gnu.org/onlinedocs/gcc/C_002b_002b-Dialect-Options.html | ||||
|   # gcc-11 is ABI16, gcc-13 is ABI18, gcc-14 is ABI19 | ||||
|   # gcc 11 - CUDA 11.8, xpu, rocm | ||||
|   # gcc 13 - CUDA 12.6, 12.8 and cpu | ||||
|   # Please see issue for reference: https://github.com/pytorch/pytorch/issues/152426 | ||||
|   if [[ "$(uname -m)" == "s390x" ]]; then | ||||
|     cxx_abi="19" | ||||
|   elif [[ "$DESIRED_CUDA" != 'xpu' && "$DESIRED_CUDA" != 'rocm'* ]]; then | ||||
|     cxx_abi="18" | ||||
|   else | ||||
|     cxx_abi="16" | ||||
|   fi | ||||
|   python -c "import torch; exit(0 if torch._C._PYBIND11_BUILD_ABI == '_cxxabi10${cxx_abi}' else 1)" | ||||
|   popd | ||||
| fi | ||||
|  | ||||
| @ -149,19 +149,6 @@ function get_pinned_commit() { | ||||
|   cat .github/ci_commit_pins/"${1}".txt | ||||
| } | ||||
|  | ||||
| function detect_cuda_arch() { | ||||
|   if [[ "${BUILD_ENVIRONMENT}" == *cuda* ]]; then | ||||
|     if command -v nvidia-smi; then | ||||
|       TORCH_CUDA_ARCH_LIST=$(nvidia-smi --query-gpu=compute_cap --format=csv | tail -n 1) | ||||
|     elif [[ "${TEST_CONFIG}" == *nogpu* ]]; then | ||||
|       # There won't be nvidia-smi in nogpu tests, so just set TORCH_CUDA_ARCH_LIST to the default | ||||
|       # minimum supported value here | ||||
|       TORCH_CUDA_ARCH_LIST=8.0 | ||||
|     fi | ||||
|     export TORCH_CUDA_ARCH_LIST | ||||
|   fi | ||||
| } | ||||
|  | ||||
| function install_torchaudio() { | ||||
|   local commit | ||||
|   commit=$(get_pinned_commit audio) | ||||
|  | ||||
| @ -35,10 +35,11 @@ fi | ||||
|  | ||||
| print_cmake_info | ||||
| if [[ ${BUILD_ENVIRONMENT} == *"distributed"* ]]; then | ||||
|   USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel | ||||
|   # Needed for inductor benchmarks, as lots of HF networks make `torch.distribtued` calls | ||||
|   USE_DISTRIBUTED=1 USE_OPENMP=1 WERROR=1 python setup.py bdist_wheel | ||||
| else | ||||
|   # NB: we always build with distributed; USE_DISTRIBUTED turns off all | ||||
|   # backends (specifically the gloo backend), so test that this case works too | ||||
|   # Explicitly set USE_DISTRIBUTED=0 to align with the default build config on mac. This also serves as the sole CI config that tests | ||||
|   # that building with USE_DISTRIBUTED=0 works at all. See https://github.com/pytorch/pytorch/issues/86448 | ||||
|   USE_DISTRIBUTED=0 USE_OPENMP=1 MACOSX_DEPLOYMENT_TARGET=11.0 WERROR=1 BUILD_TEST=OFF USE_PYTORCH_METAL=1 python setup.py bdist_wheel --plat-name macosx_11_0_arm64 | ||||
| fi | ||||
| if which sccache > /dev/null; then | ||||
|  | ||||
| @ -13,13 +13,9 @@ if [[ ! $(python -c "import torch; print(int(torch.backends.openmp.is_available( | ||||
| fi | ||||
| popd | ||||
|  | ||||
| python -mpip install -r requirements.txt | ||||
|  | ||||
| # enable debug asserts in serialization | ||||
| export TORCH_SERIALIZATION_DEBUG=1 | ||||
|  | ||||
| python -mpip install --no-input -r requirements.txt | ||||
|  | ||||
| setup_test_python() { | ||||
|   # The CircleCI worker hostname doesn't resolve to an address. | ||||
|   # This environment variable makes ProcessGroupGloo default to | ||||
| @ -178,15 +174,13 @@ checkout_install_torchbench() { | ||||
|     # to install and test other models | ||||
|     python install.py --continue_on_fail | ||||
|   fi | ||||
|   popd | ||||
|  | ||||
|   pip install -r .ci/docker/ci_commit_pins/huggingface-requirements.txt | ||||
|   # https://github.com/pytorch/pytorch/issues/160689 to remove torchao because | ||||
|   # its current version 0.12.0 doesn't work with transformers 4.54.0 | ||||
|   pip uninstall -y torchao | ||||
|   # soxr comes from https://github.com/huggingface/transformers/pull/39429 | ||||
|   pip install transformers==4.54.0 soxr==0.5.0 | ||||
|  | ||||
|   echo "Print all dependencies after TorchBench is installed" | ||||
|   python -mpip freeze | ||||
|   popd | ||||
| } | ||||
|  | ||||
| torchbench_setup_macos() { | ||||
| @ -306,47 +300,6 @@ test_torchbench_smoketest() { | ||||
|     fi | ||||
|  | ||||
|   done | ||||
|   echo "Pytorch benchmark on mps device completed" | ||||
| } | ||||
|  | ||||
| test_aoti_torchbench_smoketest() { | ||||
|   print_cmake_info | ||||
|  | ||||
|   echo "Launching AOTInductor torchbench setup" | ||||
|   pip_benchmark_deps | ||||
|   # shellcheck disable=SC2119,SC2120 | ||||
|   torchbench_setup_macos | ||||
|  | ||||
|   TEST_REPORTS_DIR=$(pwd)/test/test-reports | ||||
|   mkdir -p "$TEST_REPORTS_DIR" | ||||
|  | ||||
|   local device=mps | ||||
|   local dtypes=(undefined float16 bfloat16 notset) | ||||
|   local dtype=${dtypes[$1]} | ||||
|   local models=(hf_T5 llama BERT_pytorch dcgan hf_GPT2 yolov3 resnet152 sam sam_fast pytorch_unet stable_diffusion_text_encoder speech_transformer Super_SloMo doctr_det_predictor doctr_reco_predictor timm_resnet timm_vovnet vgg16) | ||||
|  | ||||
|   echo "Launching torchbench inference performance run for AOT Inductor and dtype ${dtype}" | ||||
|   local dtype_arg="--${dtype}" | ||||
|   if [ "$dtype" == notset ]; then | ||||
|       dtype_arg="--float32" | ||||
|   fi | ||||
|   touch "$TEST_REPORTS_DIR/aot_inductor_torchbench_${dtype}_inference_${device}_performance.csv" | ||||
|   for model in "${models[@]}"; do | ||||
|     PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|       --performance --only "$model" --export-aot-inductor --inference --devices "$device" "$dtype_arg" \ | ||||
|       --output "$TEST_REPORTS_DIR/aot_inductor_torchbench_${dtype}_inference_${device}_performance.csv" || true | ||||
|     PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/torchbench.py \ | ||||
|       --accuracy --only "$model" --export-aot-inductor --inference --devices "$device" "$dtype_arg" \ | ||||
|       --output "$TEST_REPORTS_DIR/aot_inductor_torchbench_${dtype}_inference_${device}_accuracy.csv" || true | ||||
|   done | ||||
|  | ||||
|   echo "Launching HuggingFace inference performance run for AOT Inductor and dtype ${dtype}" | ||||
|   PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/huggingface.py \ | ||||
|     --performance --export-aot-inductor --inference --devices "$device" "$dtype_arg" \ | ||||
|     --output "$TEST_REPORTS_DIR/aot_inductor_huggingface_${dtype}_inference_${device}_performance.csv" || true | ||||
|   PYTHONPATH="$(pwd)"/torchbench python benchmarks/dynamo/huggingface.py \ | ||||
|     --accuracy --export-aot-inductor --inference --devices "$device" "$dtype_arg" \ | ||||
|     --output "$TEST_REPORTS_DIR/aot_inductor_huggingface_${dtype}_inference_${device}_accuracy.csv" || true | ||||
|  | ||||
|   echo "Pytorch benchmark on mps device completed" | ||||
| } | ||||
| @ -395,8 +348,6 @@ elif [[ $TEST_CONFIG == *"perf_timm"* ]]; then | ||||
|   test_timm_perf | ||||
| elif [[ $TEST_CONFIG == *"perf_smoketest"* ]]; then | ||||
|   test_torchbench_smoketest "${SHARD_NUMBER}" | ||||
| elif [[ $TEST_CONFIG == *"aot_inductor_perf_smoketest"* ]]; then | ||||
|   test_aoti_torchbench_smoketest "${SHARD_NUMBER}" | ||||
| elif [[ $TEST_CONFIG == *"mps"* ]]; then | ||||
|   test_python_mps | ||||
| elif [[ $NUM_TEST_SHARDS -gt 1 ]]; then | ||||
|  | ||||
| @ -45,7 +45,6 @@ if [[ "${SHARD_NUMBER:-2}" == "2" ]]; then | ||||
|     # DTensor tests | ||||
|     time python test/run_test.py --verbose -i distributed/tensor/test_random_ops | ||||
|     time python test/run_test.py --verbose -i distributed/tensor/test_dtensor_compile | ||||
|     time python test/run_test.py --verbose -i distributed/tensor/test_utils.py | ||||
|  | ||||
|     # DeviceMesh test | ||||
|     time python test/run_test.py --verbose -i distributed/test_device_mesh | ||||
|  | ||||
| @ -91,7 +91,6 @@ if [[ "$BUILD_ENVIRONMENT" == *clang9* || "$BUILD_ENVIRONMENT" == *xpu* ]]; then | ||||
|   export VALGRIND=OFF | ||||
| fi | ||||
|  | ||||
| detect_cuda_arch | ||||
|  | ||||
| if [[ "$BUILD_ENVIRONMENT" == *s390x* ]]; then | ||||
|   # There are additional warnings on s390x, maybe due to newer gcc. | ||||
| @ -496,14 +495,6 @@ test_inductor_cpp_wrapper_shard() { | ||||
|     -k 'take' \ | ||||
|     --shard "$1" "$NUM_TEST_SHARDS" \ | ||||
|     --verbose | ||||
|  | ||||
|   if [[ "${BUILD_ENVIRONMENT}" == *xpu* ]]; then | ||||
|     python test/run_test.py \ | ||||
|       --include inductor/test_mkldnn_pattern_matcher \ | ||||
|       -k 'xpu' \ | ||||
|       --shard "$1" "$NUM_TEST_SHARDS" \ | ||||
|       --verbose | ||||
|   fi | ||||
| } | ||||
|  | ||||
| # "Global" flags for inductor benchmarking controlled by TEST_CONFIG | ||||
| @ -1638,10 +1629,6 @@ elif [[ "${TEST_CONFIG}" == *xla* ]]; then | ||||
|   install_torchvision | ||||
|   build_xla | ||||
|   test_xla | ||||
| elif [[ "$TEST_CONFIG" == *vllm* ]]; then | ||||
|     echo "vLLM CI uses TORCH_CUDA_ARCH_LIST: $TORCH_CUDA_ARCH_LIST" | ||||
|     (cd .ci/lumen_cli && python -m pip install -e .) | ||||
|     python -m cli.run test external vllm --test-plan "$TEST_CONFIG" --shard-id "$SHARD_NUMBER" --num-shards "$NUM_TEST_SHARDS" | ||||
| elif [[ "${TEST_CONFIG}" == *executorch* ]]; then | ||||
|   test_executorch | ||||
| elif [[ "$TEST_CONFIG" == 'jit_legacy' ]]; then | ||||
|  | ||||
| @ -44,7 +44,7 @@ python -m pip install pytest-rerunfailures==10.3 pytest-cpp==2.3.0 tensorboard== | ||||
| python -m pip install z3-solver==4.15.1.0 | ||||
|  | ||||
| # Install tlparse for test\dynamo\test_structured_trace.py UTs. | ||||
| python -m pip install tlparse==0.4.0 | ||||
| python -m pip install tlparse==0.3.30 | ||||
|  | ||||
| # Install parameterized | ||||
| python -m pip install parameterized==0.8.1 | ||||
|  | ||||
| @ -37,7 +37,7 @@ IF "%CUDA_PATH_V126%"=="" ( | ||||
| ) | ||||
|  | ||||
| IF "%BUILD_VISION%" == "" ( | ||||
|     set TORCH_CUDA_ARCH_LIST=5.0;6.0;6.1;7.0;7.5;8.0;8.6;9.0 | ||||
|     set TORCH_CUDA_ARCH_LIST=6.1;7.0;7.5;8.0;8.6;9.0 | ||||
|     set TORCH_NVCC_FLAGS=-Xfatbin -compress-all | ||||
| ) ELSE ( | ||||
|     set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_50,code=sm_50 -gencode=arch=compute_60,code=sm_60 -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 | ||||
|  | ||||
| @ -1,59 +0,0 @@ | ||||
| @echo off | ||||
|  | ||||
| set MODULE_NAME=pytorch | ||||
|  | ||||
| IF NOT EXIST "setup.py" IF NOT EXIST "%MODULE_NAME%" ( | ||||
|     call internal\clone.bat | ||||
|     cd %~dp0 | ||||
| ) ELSE ( | ||||
|     call internal\clean.bat | ||||
| ) | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| call internal\check_deps.bat | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| REM Check for optional components | ||||
|  | ||||
| set USE_CUDA= | ||||
| set CMAKE_GENERATOR=Visual Studio 15 2017 Win64 | ||||
|  | ||||
| IF "%NVTOOLSEXT_PATH%"=="" ( | ||||
|     IF EXIST "C:\Program Files\NVIDIA Corporation\NvToolsExt\lib\x64\nvToolsExt64_1.lib"  ( | ||||
|         set NVTOOLSEXT_PATH=C:\Program Files\NVIDIA Corporation\NvToolsExt | ||||
|     ) ELSE ( | ||||
|         echo NVTX ^(Visual Studio Extension ^for CUDA^) ^not installed, failing | ||||
|         exit /b 1 | ||||
|     ) | ||||
| ) | ||||
|  | ||||
| IF "%CUDA_PATH_V130%"=="" ( | ||||
|     IF EXIST "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin\nvcc.exe" ( | ||||
|         set "CUDA_PATH_V130=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0" | ||||
|     ) ELSE ( | ||||
|         echo CUDA 13.0 not found, failing | ||||
|         exit /b 1 | ||||
|     ) | ||||
| ) | ||||
|  | ||||
| IF "%BUILD_VISION%" == "" ( | ||||
|     set TORCH_CUDA_ARCH_LIST=7.5;8.0;8.6;9.0;10.0;12.0 | ||||
|     set TORCH_NVCC_FLAGS=-Xfatbin -compress-all | ||||
| ) ELSE ( | ||||
|     set NVCC_FLAGS=-D__CUDA_NO_HALF_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_80,code=compute_80 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_90,code=compute_90 -gencode=arch=compute_100,code=compute_100 -gencode=arch=compute_120,code=compute_120 | ||||
| ) | ||||
|  | ||||
| set "CUDA_PATH=%CUDA_PATH_V130%" | ||||
| set "PATH=%CUDA_PATH_V130%\bin;%PATH%" | ||||
|  | ||||
| :optcheck | ||||
|  | ||||
| call internal\check_opts.bat | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| if exist "%NIGHTLIES_PYTORCH_ROOT%" cd %NIGHTLIES_PYTORCH_ROOT%\.. | ||||
| call  %~dp0\internal\copy.bat | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
|  | ||||
| call  %~dp0\internal\setup.bat | ||||
| IF ERRORLEVEL 1 goto :eof | ||||
| @ -26,7 +26,6 @@ if exist "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v%CUDA_VERSION_STR% | ||||
| if %CUDA_VER% EQU 126 goto cuda126 | ||||
| if %CUDA_VER% EQU 128 goto cuda128 | ||||
| if %CUDA_VER% EQU 129 goto cuda129 | ||||
| if %CUDA_VER% EQU 130 goto cuda130 | ||||
|  | ||||
| echo CUDA %CUDA_VERSION_STR% is not supported | ||||
| exit /b 1 | ||||
| @ -114,33 +113,6 @@ xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32" | ||||
|  | ||||
| goto cuda_common | ||||
|  | ||||
| :cuda130 | ||||
|  | ||||
| set CUDA_INSTALL_EXE=cuda_13.0.0_windows.exe | ||||
| if not exist "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" ( | ||||
|     curl -k -L "https://ossci-windows.s3.amazonaws.com/%CUDA_INSTALL_EXE%" --output "%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" & REM @lint-ignore | ||||
|     if errorlevel 1 exit /b 1 | ||||
|     set "CUDA_SETUP_FILE=%SRC_DIR%\temp_build\%CUDA_INSTALL_EXE%" | ||||
|     set "ARGS=" | ||||
| ) | ||||
|  | ||||
| set CUDNN_FOLDER=cudnn-windows-x86_64-9.12.0.46_cuda13-archive | ||||
| set CUDNN_LIB_FOLDER="lib" | ||||
| set "CUDNN_INSTALL_ZIP=%CUDNN_FOLDER%.zip" | ||||
| if not exist "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" ( | ||||
|     curl -k -L "http://s3.amazonaws.com/ossci-windows/%CUDNN_INSTALL_ZIP%" --output "%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" & REM @lint-ignore | ||||
|     if errorlevel 1 exit /b 1 | ||||
|     set "CUDNN_SETUP_FILE=%SRC_DIR%\temp_build\%CUDNN_INSTALL_ZIP%" | ||||
| ) | ||||
|  | ||||
| @REM cuDNN 8.3+ required zlib to be installed on the path | ||||
| echo Installing ZLIB dlls | ||||
| curl -k -L "http://s3.amazonaws.com/ossci-windows/zlib123dllx64.zip" --output "%SRC_DIR%\temp_build\zlib123dllx64.zip" | ||||
| 7z x "%SRC_DIR%\temp_build\zlib123dllx64.zip" -o"%SRC_DIR%\temp_build\zlib" | ||||
| xcopy /Y "%SRC_DIR%\temp_build\zlib\dll_x64\*.dll" "C:\Windows\System32" | ||||
|  | ||||
| goto cuda_common | ||||
|  | ||||
| :cuda_common | ||||
| :: NOTE: We only install CUDA if we don't have it installed already. | ||||
| :: With GHA runners these should be pre-installed as part of our AMI process | ||||
|  | ||||
| @ -1,22 +1,12 @@ | ||||
| set ADDITIONAL_OPTIONS="" | ||||
| set PYTHON_EXEC="python" | ||||
|  | ||||
|  | ||||
| if "%DESIRED_PYTHON%" == "3.13t" ( | ||||
|     echo Python version is set to 3.13t | ||||
|     set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.13.0/python-3.13.0-amd64.exe" | ||||
|     set ADDITIONAL_OPTIONS="Include_freethreaded=1" | ||||
|     set PYTHON_EXEC="python3.13t" | ||||
| ) else if "%DESIRED_PYTHON%"=="3.14" ( | ||||
|     echo Python version is set to 3.14 or 3.14t | ||||
|     set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0rc1-amd64.exe" | ||||
| ) else if "%DESIRED_PYTHON%"=="3.14t" ( | ||||
|     echo Python version is set to 3.14 or 3.14t | ||||
|     set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/3.14.0/python-3.14.0rc1-amd64.exe" | ||||
|     set ADDITIONAL_OPTIONS="Include_freethreaded=1" | ||||
|     set PYTHON_EXEC="python3.14t" | ||||
| ) else ( | ||||
|     echo Python version is set to %DESIRED_PYTHON% | ||||
|     echo DESIRED_PYTHON not defined, Python version is set to %DESIRED_PYTHON% | ||||
|     set "PYTHON_INSTALLER_URL=https://www.python.org/ftp/python/%DESIRED_PYTHON%.0/python-%DESIRED_PYTHON%.0-amd64.exe" %= @lint-ignore =% | ||||
| ) | ||||
|  | ||||
|  | ||||
| @ -13,9 +13,9 @@ if not exist "%SRC_DIR%\temp_build" mkdir "%SRC_DIR%\temp_build" | ||||
| :xpu_bundle_install_start | ||||
|  | ||||
| set XPU_BUNDLE_PARENT_DIR=C:\Program Files (x86)\Intel\oneAPI | ||||
| set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe | ||||
| set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/9d6d6c17-ca2d-4735-9331-99447e4a1280/intel-deep-learning-essentials-2025.0.1.28_offline.exe | ||||
| set XPU_BUNDLE_PRODUCT_NAME=intel.oneapi.win.deep-learning-essentials.product | ||||
| set XPU_BUNDLE_VERSION=2025.1.3+5 | ||||
| set XPU_BUNDLE_VERSION=2025.0.1+20 | ||||
| set XPU_BUNDLE_INSTALLED=0 | ||||
| set XPU_BUNDLE_UNINSTALL=0 | ||||
| set XPU_EXTRA_URL=NULL | ||||
| @ -24,9 +24,9 @@ set XPU_EXTRA_VERSION=2025.0.1+1226 | ||||
| set XPU_EXTRA_INSTALLED=0 | ||||
| set XPU_EXTRA_UNINSTALL=0 | ||||
|  | ||||
| if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.2] ( | ||||
|     set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/24751ead-ddc5-4479-b9e6-f9fe2ff8b9f2/intel-deep-learning-essentials-2025.2.1.25_offline.exe | ||||
|     set XPU_BUNDLE_VERSION=2025.2.1+20 | ||||
| if not [%XPU_VERSION%]==[] if [%XPU_VERSION%]==[2025.1] ( | ||||
|     set XPU_BUNDLE_URL=https://registrationcenter-download.intel.com/akdlm/IRC_NAS/75d4eb97-914a-4a95-852c-7b9733d80f74/intel-deep-learning-essentials-2025.1.3.8_offline.exe | ||||
|     set XPU_BUNDLE_VERSION=2025.1.3+5 | ||||
| ) | ||||
|  | ||||
| :: Check if XPU bundle is target version or already installed | ||||
| @ -90,3 +90,14 @@ if errorlevel 1 exit /b 1 | ||||
| del xpu_extra.exe | ||||
|  | ||||
| :xpu_install_end | ||||
|  | ||||
| if not "%XPU_ENABLE_KINETO%"=="1" goto install_end | ||||
| :: Install Level Zero SDK | ||||
| set XPU_EXTRA_LZ_URL=https://github.com/oneapi-src/level-zero/releases/download/v1.14.0/level-zero-sdk_1.14.0.zip | ||||
| curl -k -L %XPU_EXTRA_LZ_URL% --output "%SRC_DIR%\temp_build\level_zero_sdk.zip" | ||||
| echo "Installing level zero SDK..." | ||||
| 7z x "%SRC_DIR%\temp_build\level_zero_sdk.zip" -o"%SRC_DIR%\temp_build\level_zero" | ||||
| set "INCLUDE=%SRC_DIR%\temp_build\level_zero\include;%INCLUDE%" | ||||
| del "%SRC_DIR%\temp_build\level_zero_sdk.zip" | ||||
|  | ||||
| :install_end | ||||
|  | ||||
| @ -7,8 +7,6 @@ call "internal\install_python.bat" | ||||
|  | ||||
| %PYTHON_EXEC% --version | ||||
| set "PATH=%CD%\Python\Lib\site-packages\cmake\data\bin;%CD%\Python\Scripts;%CD%\Python;%PATH%" | ||||
| if "%DESIRED_PYTHON%" == "3.14t" %PYTHON_EXEC% -m pip install numpy==2.3.2 cmake | ||||
| if "%DESIRED_PYTHON%" == "3.14" %PYTHON_EXEC% -m pip install numpy==2.3.2 cmake | ||||
| if "%DESIRED_PYTHON%" == "3.13t" %PYTHON_EXEC% -m pip install numpy==2.2.1 cmake | ||||
| if "%DESIRED_PYTHON%" == "3.13" %PYTHON_EXEC% -m pip install numpy==2.1.2 cmake | ||||
| if "%DESIRED_PYTHON%" == "3.12" %PYTHON_EXEC% -m pip install numpy==2.0.2 cmake | ||||
|  | ||||
| @ -128,35 +128,16 @@ export MACOSX_DEPLOYMENT_TARGET=10.15 | ||||
| export CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} | ||||
|  | ||||
| SETUPTOOLS_PINNED_VERSION="==70.1.0" | ||||
| PYYAML_PINNED_VERSION="==5.3" | ||||
| PYYAML_PINNED_VERSION="=5.3" | ||||
| EXTRA_CONDA_INSTALL_FLAGS="" | ||||
| CONDA_ENV_CREATE_FLAGS="" | ||||
| RENAME_WHEEL=true | ||||
| case $desired_python in | ||||
|     3.14t) | ||||
|         echo "Using 3.14 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|         EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|         desired_python="3.14.0rc1" | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
|     3.14) | ||||
|         echo "Using 3.14t deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|         desired_python="3.14.0rc1" | ||||
|         RENAME_WHEEL=false | ||||
|         ;; | ||||
|     3.13t) | ||||
|         echo "Using 3.13 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         NUMPY_PINNED_VERSION="=2.1.0" | ||||
|         CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|         EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|         desired_python="3.13" | ||||
| @ -166,35 +147,35 @@ case $desired_python in | ||||
|         echo "Using 3.13 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.1.0" | ||||
|         NUMPY_PINNED_VERSION="=2.1.0" | ||||
|         ;; | ||||
|     3.12) | ||||
|         echo "Using 3.12 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=6.0.1" | ||||
|         NUMPY_PINNED_VERSION="==2.0.2" | ||||
|         NUMPY_PINNED_VERSION="=2.0.2" | ||||
|         ;; | ||||
|     3.11) | ||||
|         echo "Using 3.11 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=5.3" | ||||
|         NUMPY_PINNED_VERSION="==2.0.2" | ||||
|         NUMPY_PINNED_VERSION="=2.0.2" | ||||
|         ;; | ||||
|     3.10) | ||||
|         echo "Using 3.10 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=5.3" | ||||
|         NUMPY_PINNED_VERSION="==2.0.2" | ||||
|         NUMPY_PINNED_VERSION="=2.0.2" | ||||
|         ;; | ||||
|     3.9) | ||||
|         echo "Using 3.9 deps" | ||||
|         SETUPTOOLS_PINNED_VERSION=">=70.1.0" | ||||
|         PYYAML_PINNED_VERSION=">=5.3" | ||||
|         NUMPY_PINNED_VERSION="==2.0.2" | ||||
|         NUMPY_PINNED_VERSION="=2.0.2" | ||||
|         ;; | ||||
|     *) | ||||
|         echo "Using default deps" | ||||
|         NUMPY_PINNED_VERSION="==1.11.3" | ||||
|         NUMPY_PINNED_VERSION="=1.11.3" | ||||
|         ;; | ||||
| esac | ||||
|  | ||||
| @ -203,18 +184,12 @@ tmp_env_name="wheel_py$python_nodot" | ||||
| conda create ${EXTRA_CONDA_INSTALL_FLAGS} -yn "$tmp_env_name" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} | ||||
| source activate "$tmp_env_name" | ||||
|  | ||||
| PINNED_PACKAGES=( | ||||
|     "setuptools${SETUPTOOLS_PINNED_VERSION}" | ||||
|     "pyyaml${PYYAML_PINNED_VERSION}" | ||||
|     "numpy${NUMPY_PINNED_VERSION}" | ||||
| ) | ||||
| retry pip install "${PINNED_PACKAGES[@]}" -r "${pytorch_rootdir}/requirements-build.txt" | ||||
| pip install requests ninja typing-extensions | ||||
| retry pip install -r "${pytorch_rootdir}/requirements-build.txt" | ||||
| pip install "numpy=${NUMPY_PINNED_VERSION}"  "pyyaml${PYYAML_PINNED_VERSION}" requests ninja "setuptools${SETUPTOOLS_PINNED_VERSION}" typing-extensions | ||||
| retry pip install -r "${pytorch_rootdir}/requirements.txt" || true | ||||
| retry brew install libomp | ||||
|  | ||||
| # For USE_DISTRIBUTED=1 on macOS, this enables gloo, which needs libuv, which | ||||
| # is build as part of tensorpipe submodule | ||||
| # For USE_DISTRIBUTED=1 on macOS, need libuv, which is build as part of tensorpipe submodule | ||||
| export USE_DISTRIBUTED=1 | ||||
|  | ||||
| export USE_MKLDNN=OFF | ||||
|  | ||||
| @ -75,8 +75,8 @@ TRITON_VERSION=$(cat $PYTORCH_ROOT/.ci/docker/triton_version.txt) | ||||
| # Here PYTORCH_EXTRA_INSTALL_REQUIREMENTS is already set for the all the wheel builds hence append TRITON_CONSTRAINT | ||||
| TRITON_CONSTRAINT="platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
|  | ||||
| # CUDA 12.9/13.0 builds have triton for Linux and Linux aarch64 binaries. | ||||
| if [[ "$DESIRED_CUDA" == "cu129" ]] || [[ "$DESIRED_CUDA" == "cu130" ]]; then | ||||
| # CUDA 12.9 builds have triton for Linux and Linux aarch64 binaries. | ||||
| if [[ "$DESIRED_CUDA" == "cu129" ]]; then | ||||
|   TRITON_CONSTRAINT="platform_system == 'Linux'" | ||||
| fi | ||||
|  | ||||
|  | ||||
| @ -51,12 +51,16 @@ s3_upload() { | ||||
|     s3_upload_dir="${s3_root_dir}/${UPLOAD_SUBFOLDER}/" | ||||
|   fi | ||||
|   ( | ||||
|     cache_control_flag="" | ||||
|     if [[ "${UPLOAD_CHANNEL}" = "test" ]]; then | ||||
|       cache_control_flag="--cache-control='no-cache,no-store,must-revalidate'" | ||||
|     fi | ||||
|     for pkg in ${PKG_DIR}/*.${extension}; do | ||||
|       ( | ||||
|         set -x | ||||
|         shm_id=$(sha256sum "${pkg}" | awk '{print $1}') | ||||
|         ${AWS_S3_CP} --no-progress --acl public-read "${pkg}" "${s3_upload_dir}" \ | ||||
|           --metadata "checksum-sha256=${shm_id}" | ||||
|           --metadata "checksum-sha256=${shm_id}" ${cache_control_flag} | ||||
|       ) | ||||
|     done | ||||
|   ) | ||||
|  | ||||
| @ -15,7 +15,8 @@ fi | ||||
| if [[ "$DESIRED_CUDA" == 'xpu' ]]; then | ||||
|     export VC_YEAR=2022 | ||||
|     export USE_SCCACHE=0 | ||||
|     export XPU_VERSION=2025.2 | ||||
|     export XPU_VERSION=2025.1 | ||||
|     export XPU_ENABLE_KINETO=1 | ||||
| fi | ||||
|  | ||||
| echo "Free space on filesystem before build:" | ||||
|  | ||||
| @ -8,7 +8,7 @@ export VC_YEAR=2022 | ||||
|  | ||||
| if [[ "$DESIRED_CUDA" == 'xpu' ]]; then | ||||
|     export VC_YEAR=2022 | ||||
|     export XPU_VERSION=2025.2 | ||||
|     export XPU_VERSION=2025.1 | ||||
| fi | ||||
|  | ||||
| pushd "$PYTORCH_ROOT/.ci/pytorch/" | ||||
|  | ||||
							
								
								
									
										1
									
								
								.flake8
									
									
									
									
									
								
							
							
						
						
									
										1
									
								
								.flake8
									
									
									
									
									
								
							| @ -48,7 +48,6 @@ per-file-ignores = | ||||
|     torch/__init__.py: F401,TOR901 | ||||
|     torch/_custom_op/impl.py: TOR901 | ||||
|     torch/_export/serde/upgrade.py: TOR901 | ||||
|     torch/_functorch/predispatch.py: TOR901 | ||||
|     torch/_functorch/vmap.py: TOR901 | ||||
|     torch/_inductor/test_operators.py: TOR901 | ||||
|     torch/_library/abstract_impl.py: TOR901 | ||||
|  | ||||
| @ -1,81 +0,0 @@ | ||||
| # .github/workflows/build-external.yml | ||||
| name: Build External packages | ||||
|  | ||||
| description: build external packages for PyTorch | ||||
|  | ||||
| inputs: | ||||
|   cuda-arch-list: | ||||
|     description: TORCH_CUDA_ARCH_LIST (e.g., "8.0;8.9;9.0") | ||||
|     type: string | ||||
|     required: true | ||||
|     default: "" | ||||
|   docker-image: | ||||
|     description: Base image to use | ||||
|     type: string | ||||
|     required: true | ||||
|   build-targets: | ||||
|     description: Build targets | ||||
|     type: string | ||||
|     required: true | ||||
|   torch-wheel-dir: | ||||
|     description: Directory to built torch wheel | ||||
|     type: string | ||||
|     required: false | ||||
|     default: dist | ||||
|   output-dir: | ||||
|     description: Directory to store build artifact | ||||
|     default: external | ||||
|     type: string | ||||
|     required: false | ||||
|  | ||||
| outputs: | ||||
|   build_time: | ||||
|     description: "Total build time in seconds" | ||||
|     value: ${{ steps.build-external.outputs.build_time }} | ||||
|   output_dir: | ||||
|     description: "Directory where build artifact is stored" | ||||
|     value: ${{ steps.build-external.outputs.output_dir }} | ||||
|  | ||||
| runs: | ||||
|   using: composite | ||||
|   steps: | ||||
|     - name: Build external packages in sequence | ||||
|       id: build-external | ||||
|       env: | ||||
|         SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 | ||||
|         SCCACHE_REGION: us-east-1 | ||||
|         TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }} | ||||
|         BASE_IMAGE: ${{ inputs.docker-image }} | ||||
|         BUILD_TARGETS: ${{ inputs.build-targets }} | ||||
|         PARENT_OUTPUT_DIR: ${{ inputs.output-dir}} | ||||
|  | ||||
|       shell: bash | ||||
|       run: | | ||||
|         set -euo pipefail | ||||
|         python3 --version | ||||
|         docker images | ||||
|         START_TIME=$(date +%s) | ||||
|         ( | ||||
|           cd .ci/lumen_cli | ||||
|           python3 -m pip install -e . | ||||
|         ) | ||||
|         MAX_JOBS="$(nproc --ignore=6)" | ||||
|         export MAX_JOBS | ||||
|  | ||||
|         # Split the comma-separated list and build each target | ||||
|         IFS=',' read -ra TARGETS <<< "$BUILD_TARGETS" | ||||
|         for target in "${TARGETS[@]}"; do | ||||
|           OUTPUT_DIR="$PARENT_OUTPUT_DIR/$target" | ||||
|           export OUTPUT_DIR | ||||
|           echo "Building external package: $target in directory $OUTPUT_DIR" | ||||
|           python3 -m cli.run build external "$target" | ||||
|  | ||||
|         done | ||||
|  | ||||
|         END_TIME=$(date +%s) | ||||
|         { | ||||
|           echo "build_time=$((END_TIME - START_TIME))" | ||||
|           if [ -d "$PARENT_OUTPUT_DIR" ]; then | ||||
|             echo "output_dir=$PARENT_OUTPUT_DIR" | ||||
|           fi | ||||
|         } >> "$GITHUB_OUTPUT" | ||||
							
								
								
									
										15
									
								
								.github/actions/checkout-pytorch/action.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/actions/checkout-pytorch/action.yml
									
									
									
									
										vendored
									
									
								
							| @ -57,21 +57,6 @@ runs: | ||||
|         submodules: ${{ inputs.submodules }} | ||||
|         show-progress: false | ||||
|  | ||||
|     - name: Clean submodules post checkout | ||||
|       id: clean-submodules | ||||
|       if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }} | ||||
|       shell: bash | ||||
|       env: | ||||
|         NO_SUDO: ${{ inputs.no-sudo }} | ||||
|       run: | | ||||
|         cd "${GITHUB_WORKSPACE}" | ||||
|         # Clean stale submodule dirs | ||||
|         if [ -z "${NO_SUDO}" ]; then | ||||
|           sudo git submodule foreach --recursive git clean -ffdx | ||||
|         else | ||||
|           git submodule foreach --recursive git clean -ffdx | ||||
|         fi | ||||
|  | ||||
|     - name: Clean workspace (try again) | ||||
|       if: ${{ steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' && | ||||
|         (steps.first-clean.outcome != 'success' || steps.first-checkout-attempt.outcome != 'success') }} | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/audio.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/audio.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 0757bbb660855272f7dd8d31cc84e7c631522805 | ||||
| bdb88e1d66f272cad72156c90ac8428ca61a601c | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/vllm.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/vllm.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 862f2ef893d9751db0a92bd2d4ae0e3d9677872f | ||||
| 458e74eb907f96069e6d8a4f3c9f457001fef2ea | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ci_commit_pins/xla.txt
									
									
									
									
										vendored
									
									
								
							| @ -1 +1 @@ | ||||
| 763e5b78d4fcd74a9e812256656c075f99d9a781 | ||||
| 095faec1e7b6cc47220181e74ae9cde2605f9b00 | ||||
|  | ||||
							
								
								
									
										439
									
								
								.github/ci_configs/vllm/Dockerfile.tmp_vllm
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										439
									
								
								.github/ci_configs/vllm/Dockerfile.tmp_vllm
									
									
									
									
										vendored
									
									
								
							| @ -1,439 +0,0 @@ | ||||
| # TODO(elainwy): remove this file after the torch nightly dockerfile is in sync in vllm repo | ||||
| # The vLLM Dockerfile is used to construct vLLM image against torch nightly and torch main that can be directly used for testing | ||||
|  | ||||
| ARG CUDA_VERSION=12.8.1 | ||||
| ARG PYTHON_VERSION=3.12 | ||||
|  | ||||
| # BUILD_BASE_IMAGE: used to setup python build xformers, and vllm wheels, It can be replaced with a different base image from local machine, | ||||
| # by default, it uses the torch-nightly-base stage from this docker image | ||||
| ARG BUILD_BASE_IMAGE=torch-nightly-base | ||||
|  | ||||
| # FINAL_BASE_IMAGE: used to set up vllm-instaled environment and build flashinfer, | ||||
| # by default, it uses devel-ubuntu22.04 official image. | ||||
| ARG FINAL_BASE_IMAGE=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 | ||||
|  | ||||
|  | ||||
| #################### TORCH NIGHTLY  BASE IMAGE #################### | ||||
| # A base image for building vLLM with devel ubuntu 22.04, this is mainly used to build vllm in vllm builtkite ci | ||||
| From nvidia/cuda:${CUDA_VERSION}-devel-ubuntu22.04 as torch-nightly-base | ||||
| ARG CUDA_VERSION=12.8.1 | ||||
| ARG PYTHON_VERSION=3.12 | ||||
| ARG TARGETPLATFORM | ||||
| ENV DEBIAN_FRONTEND=noninteractive | ||||
|  | ||||
| RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ | ||||
|     echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment | ||||
|  | ||||
| # Install Python and other dependencies if it does not existed | ||||
| RUN if ! command -v python3 >/dev/null || ! python3 --version | grep -q "${PYTHON_VERSION}"; then \ | ||||
|       echo "Installing Python ${PYTHON_VERSION}..." && \ | ||||
|       echo 'tzdata tzdata/Areas select America' | debconf-set-selections && \ | ||||
|       echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections && \ | ||||
|       apt-get update -y && \ | ||||
|       apt-get install -y ccache software-properties-common git curl sudo && \ | ||||
|       for i in 1 2 3; do \ | ||||
|         add-apt-repository -y ppa:deadsnakes/ppa && break || \ | ||||
|         { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ | ||||
|       done && \ | ||||
|       apt-get update -y && \ | ||||
|       apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv && \ | ||||
|       update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && \ | ||||
|       update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && \ | ||||
|       ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && \ | ||||
|       curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION}; \ | ||||
|    else \ | ||||
|       echo "Python ${PYTHON_VERSION} already present, skipping setup."; \ | ||||
|    fi \ | ||||
|    && python3 --version && python3 -m pip --version | ||||
|  | ||||
| # Upgrade to GCC 10 to avoid https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92519 | ||||
| # as it was causing spam when compiling the CUTLASS kernels | ||||
| # Ensure gcc >= 10 to avoid CUTLASS issues (bug 92519) | ||||
| RUN current_gcc_version=$(gcc -dumpversion | cut -f1 -d.) && \ | ||||
|     if [ "$current_gcc_version" -lt 10 ]; then \ | ||||
|       echo "GCC version is $current_gcc_version, installing gcc-10..."; \ | ||||
|       apt-get update && \ | ||||
|       apt-get install -y gcc-10 g++-10 && \ | ||||
|       update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 100 && \ | ||||
|       update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 100; \ | ||||
|     else \ | ||||
|       echo "GCC version is $current_gcc_version, no need to install gcc-10."; \ | ||||
|     fi && \ | ||||
|     gcc --version && g++ --version | ||||
|  | ||||
| # install uv for faster pip installs | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     python3 -m pip install uv==0.8.4 | ||||
|  | ||||
| ENV UV_HTTP_TIMEOUT=500 | ||||
| ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||||
| # Use copy mode to avoid hardlink failures with Docker cache mounts | ||||
| ENV UV_LINK_MODE=copy | ||||
|  | ||||
| #################### TORCH NIGHTLY  BASE IMAGE #################### | ||||
|  | ||||
|  | ||||
| #################### BASE BUILD IMAGE #################### | ||||
| # A base image for building vLLM with torch nightly or torch wheels | ||||
| # prepare basic build environment | ||||
| FROM ${BUILD_BASE_IMAGE} AS base | ||||
| USER root | ||||
|  | ||||
| # Workaround for https://github.com/openai/triton/issues/2507 and | ||||
| # https://github.com/pytorch/pytorch/issues/107960 -- hopefully | ||||
| # this won't be needed for future versions of this docker image | ||||
| # or future versions of triton. | ||||
| RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ | ||||
|  | ||||
| # Install uv for faster pip installs if not existed | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     if ! python3 -m uv --version >/dev/null 2>&1; then \ | ||||
|         python3 -m pip install uv==0.8.4; \ | ||||
|     fi | ||||
| ENV UV_HTTP_TIMEOUT=500 | ||||
| ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||||
| # Use copy mode to avoid hardlink failures with Docker cache mounts | ||||
| ENV UV_LINK_MODE=copy | ||||
|  | ||||
| WORKDIR /workspace | ||||
|  | ||||
| # install build and runtime dependencies | ||||
| COPY requirements/common.txt requirements/common.txt | ||||
| COPY use_existing_torch.py use_existing_torch.py | ||||
| COPY pyproject.toml pyproject.toml | ||||
|  | ||||
| # install build and runtime dependencies without stable torch version | ||||
| RUN python3 use_existing_torch.py | ||||
|  | ||||
| # default mount file as placeholder, this just avoid the mount error | ||||
| # change to a different vllm folder if this does not exist anymore | ||||
| ARG TORCH_WHEELS_PATH="./requirements" | ||||
| ARG PINNED_TORCH_VERSION | ||||
|  | ||||
| # Install torch, torchaudio and torchvision based on the input | ||||
| # if TORCH_WHEELS_PATH is default "./requirements", it will pull thethe nightly versions using pip | ||||
| # otherwise, it will use the whls from TORCH_WHEELS_PATH from the host machine | ||||
| RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \ | ||||
|     --mount=type=cache,target=/root/.cache/uv \ | ||||
|     if [ -n "$TORCH_WHEELS_PATH" ] && [ "$TORCH_WHEELS_PATH" != "./requirements" ] && [ -d "/dist" ] && ls /dist/torch*.whl >/dev/null 2>&1; then \ | ||||
|         echo "[INFO] Installing torch wheels to build vllm"; \ | ||||
|         torch_whl=$(find /dist -maxdepth 1 -name 'torch-*.whl' -print -quit); \ | ||||
|         vision_whl=$(find /dist/vision -name 'torchvision*.whl' | head -n1 | xargs); \ | ||||
|         audio_whl=$(find /dist/audio -name 'torchaudio*.whl' | head -n1 | xargs); \ | ||||
|         uv pip install --system "${torch_whl}[opt-einsum]"; \ | ||||
|         uv pip install --system "${vision_whl}"; \ | ||||
|         uv pip install --system "${audio_whl}"; \ | ||||
|     elif [ -n "$PINNED_TORCH_VERSION" ]; then \ | ||||
|         echo "[INFO] Installing pinned torch nightly version to build vllm: $PINNED_TORCH_VERSION"; \ | ||||
|         uv pip install --system "$PINNED_TORCH_VERSION" --index-url https://download.pytorch.org/whl/nightly/cu128; \ | ||||
|     else \ | ||||
|         echo "[INFO] Installing torch nightly with latest one to build vllm"; \ | ||||
|         uv pip install --system torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu128; \ | ||||
|     fi | ||||
|  | ||||
| # Install numba 0.61.2 for cuda environment | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system numba==0.61.2 | ||||
|  | ||||
| # Install common dependencies from vllm common.txt | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
| uv pip install --system -r requirements/common.txt | ||||
|  | ||||
|  | ||||
| # Must put before installing xformers, so it can install the correct version of xfomrers. | ||||
| ARG exformer_cuda_arch_list='7.5;8.0+PTX;9.0a' | ||||
| ENV TORCH_CUDA_ARCH_LIST=${exformer_cuda_arch_list} | ||||
|  | ||||
| ARG max_jobs=16 | ||||
| ENV MAX_JOBS=${max_jobs} | ||||
|  | ||||
| RUN echo ${TORCH_CUDA_ARCH_LIST} | ||||
| RUN echo ${MAX_JOBS} | ||||
| RUN pip freeze | grep -E 'ninja' | ||||
|  | ||||
| # Build xformers with cuda and torch nightly/wheel | ||||
| # following official xformers guidance: https://github.com/facebookresearch/xformers#build | ||||
| # sha for https://github.com/facebookresearch/xformers/tree/v0.0.31 | ||||
| ARG XFORMERS_COMMIT=eb0946a363464da96ea40afd1a7f72a907c25497 | ||||
| ENV CCACHE_DIR=/root/.cache/ccache | ||||
|  | ||||
| RUN --mount=type=cache,target=/root/.cache/ccache \ | ||||
|     --mount=type=cache,target=/root/.cache/uv \ | ||||
|     echo 'git clone xformers...' \ | ||||
|     && git clone https://github.com/facebookresearch/xformers.git --recursive \ | ||||
|     && cd xformers \ | ||||
|     && git checkout ${XFORMERS_COMMIT} \ | ||||
|     && git submodule update --init --recursive \ | ||||
|     && echo 'finish git clone xformers...' \ | ||||
|     && rm -rf build \ | ||||
|     && python3 setup.py bdist_wheel --dist-dir=../xformers-dist --verbose \ | ||||
|     && cd .. \ | ||||
|     && rm -rf xformers | ||||
|  | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system xformers-dist/*.whl --verbose | ||||
|  | ||||
| # Build can take a long time, and the torch nightly version fetched from url can be different in next docker stage. | ||||
| # track the nightly torch version used in the build, when we set up runtime environment we can make sure the version is the same | ||||
| RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio' > torch_build_versions.txt | ||||
|  | ||||
| RUN cat torch_build_versions.txt | ||||
| RUN pip freeze | grep -E 'torch|xformers|torchvision|torchaudio' | ||||
|  | ||||
| #################### BASE BUILD IMAGE #################### | ||||
|  | ||||
|  | ||||
| #################### WHEEL BUILD IMAGE #################### | ||||
| # Image used to build vllm wheel | ||||
| FROM base AS build | ||||
| ARG TARGETPLATFORM | ||||
|  | ||||
| ENV UV_HTTP_TIMEOUT=500 | ||||
| ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||||
| # Use copy mode to avoid hardlink failures with Docker cache mounts | ||||
| ENV UV_LINK_MODE=copy | ||||
|  | ||||
| COPY . . | ||||
|  | ||||
| RUN python3 use_existing_torch.py | ||||
|  | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system -r requirements/build.txt | ||||
|  | ||||
| ARG GIT_REPO_CHECK=0 | ||||
| RUN --mount=type=bind,source=.git,target=.git \ | ||||
|     if [ "$GIT_REPO_CHECK" != "0" ]; then bash tools/check_repo.sh ; fi | ||||
|  | ||||
| # Max jobs used by Ninja to build extensions | ||||
| ARG max_jobs=16 | ||||
| ENV MAX_JOBS=${max_jobs} | ||||
| ARG nvcc_threads=4 | ||||
| ENV NVCC_THREADS=$nvcc_threads | ||||
| ARG torch_cuda_arch_list='8.0;8.6;8.9;9.0' | ||||
| ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} | ||||
|  | ||||
| ARG USE_SCCACHE | ||||
| ARG SCCACHE_BUCKET_NAME=vllm-build-sccache | ||||
| ARG SCCACHE_REGION_NAME=us-west-2 | ||||
| ARG SCCACHE_S3_NO_CREDENTIALS=0 | ||||
|  | ||||
| # if USE_SCCACHE is set, use sccache to speed up compilation | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     --mount=type=bind,source=.git,target=.git \ | ||||
|     if [ "$USE_SCCACHE" = "1" ]; then \ | ||||
|         echo "Installing sccache..." \ | ||||
|         && curl -L -o sccache.tar.gz https://github.com/mozilla/sccache/releases/download/v0.8.1/sccache-v0.8.1-x86_64-unknown-linux-musl.tar.gz \ | ||||
|         && tar -xzf sccache.tar.gz \ | ||||
|         && sudo mv sccache-v0.8.1-x86_64-unknown-linux-musl/sccache /usr/bin/sccache \ | ||||
|         && rm -rf sccache.tar.gz sccache-v0.8.1-x86_64-unknown-linux-musl \ | ||||
|         && export SCCACHE_BUCKET=${SCCACHE_BUCKET_NAME} \ | ||||
|         && export SCCACHE_REGION=${SCCACHE_REGION_NAME} \ | ||||
|         && export SCCACHE_S3_NO_CREDENTIALS=${SCCACHE_S3_NO_CREDENTIALS} \ | ||||
|         && export SCCACHE_IDLE_TIMEOUT=0 \ | ||||
|         && export CMAKE_BUILD_TYPE=Release \ | ||||
|         && export VLLM_DOCKER_BUILD_CONTEXT=1 \ | ||||
|         && sccache --show-stats \ | ||||
|         && python3 setup.py bdist_wheel --dist-dir=vllm-dist --py-limited-api=cp38 \ | ||||
|         && sccache --show-stats; \ | ||||
|     fi | ||||
|  | ||||
| ARG vllm_target_device="cuda" | ||||
| ENV VLLM_TARGET_DEVICE=${vllm_target_device} | ||||
| ENV CCACHE_DIR=/root/.cache/ccache | ||||
| RUN --mount=type=cache,target=/root/.cache/ccache \ | ||||
|     --mount=type=cache,target=/root/.cache/uv \ | ||||
|     --mount=type=bind,source=.git,target=.git  \ | ||||
|     if [ "$USE_SCCACHE" != "1" ]; then \ | ||||
|         # Clean any existing CMake artifacts | ||||
|         rm -rf .deps && \ | ||||
|         mkdir -p .deps && \ | ||||
|         export VLLM_DOCKER_BUILD_CONTEXT=1 && \ | ||||
|         python3 setup.py bdist_wheel --dist-dir=vllm-dist --py-limited-api=cp38; \ | ||||
|     fi | ||||
|  | ||||
| RUN echo "[DEBUG] Listing  current directory:" && \ | ||||
|     ls -al && \ | ||||
|     echo "[DEBUG] Showing torch_build_versions.txt content:" && \ | ||||
|     cat torch_build_versions.txt | ||||
|  | ||||
| #################### WHEEL BUILD IMAGE #################### | ||||
|  | ||||
|  | ||||
| ################### VLLM INSTALLED IMAGE #################### | ||||
| # Setup clean environment for vLLM for test and api server using ubuntu22.04 with AOT flashinfer | ||||
| FROM ${FINAL_BASE_IMAGE} AS vllm-base | ||||
| USER root | ||||
| # prepare for environment starts | ||||
| WORKDIR /workspace | ||||
|  | ||||
| RUN PYTHON_VERSION_STR=$(echo ${PYTHON_VERSION} | sed 's/\.//g') && \ | ||||
|     echo "export PYTHON_VERSION_STR=${PYTHON_VERSION_STR}" >> /etc/environment | ||||
|  | ||||
| # Install Python and other dependencies if it does not existed | ||||
| RUN if ! command -v python3 >/dev/null || ! python3 --version | grep -q "${PYTHON_VERSION}"; then \ | ||||
|       echo "Installing Python ${PYTHON_VERSION}..." && \ | ||||
|       echo 'tzdata tzdata/Areas select America' | debconf-set-selections && \ | ||||
|       echo 'tzdata tzdata/Zones/America select Los_Angeles' | debconf-set-selections && \ | ||||
|       apt-get update -y && \ | ||||
|       apt-get install -y ccache software-properties-common git curl sudo && \ | ||||
|       for i in 1 2 3; do \ | ||||
|         add-apt-repository -y ppa:deadsnakes/ppa && break || \ | ||||
|         { echo "Attempt $i failed, retrying in 5s..."; sleep 5; }; \ | ||||
|       done && \ | ||||
|       apt-get update -y && \ | ||||
|       apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python${PYTHON_VERSION}-venv && \ | ||||
|       update-alternatives --install /usr/bin/python3 python3 /usr/bin/python${PYTHON_VERSION} 1 && \ | ||||
|       update-alternatives --set python3 /usr/bin/python${PYTHON_VERSION} && \ | ||||
|       ln -sf /usr/bin/python${PYTHON_VERSION}-config /usr/bin/python3-config && \ | ||||
|       curl -sS https://bootstrap.pypa.io/get-pip.py | python${PYTHON_VERSION}; \ | ||||
|    else \ | ||||
|       echo "Python ${PYTHON_VERSION} already present, skipping setup."; \ | ||||
|    fi \ | ||||
|    && python3 --version && python3 -m pip --version | ||||
|  | ||||
|  | ||||
| # Get the torch versions, and whls used in previous stagtes for consistency | ||||
| COPY --from=base /workspace/torch_build_versions.txt ./torch_build_versions.txt | ||||
| COPY --from=base /workspace/xformers-dist /wheels/xformers | ||||
| COPY --from=build /workspace/vllm-dist /wheels/vllm | ||||
| RUN echo "[DEBUG] Listing current directory before torch install step:" && \ | ||||
|     ls -al && \ | ||||
|     echo "[DEBUG] Showing torch_build_versions.txt content:" && \ | ||||
|     cat torch_build_versions.txt | ||||
|  | ||||
| # Workaround for https://github.com/openai/triton/issues/2507 and | ||||
| # https://github.com/pytorch/pytorch/issues/107960 -- hopefully | ||||
| # this won't be needed for future versions of this docker image | ||||
| # or future versions of triton. | ||||
| RUN ldconfig /usr/local/cuda-$(echo $CUDA_VERSION | cut -d. -f1,2)/compat/ | ||||
|  | ||||
|  | ||||
| # Install uv for faster pip installs if not existed | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     if ! python3 -m uv --version > /dev/null 2>&1; then \ | ||||
|         python3 -m pip install uv==0.8.4; \ | ||||
|     fi | ||||
| ENV UV_HTTP_TIMEOUT=500 | ||||
| ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||||
| # Use copy mode to avoid hardlink failures with Docker cache mounts | ||||
| ENV UV_LINK_MODE=copy | ||||
|  | ||||
| # Default mount file as placeholder, this just avoid the mount error | ||||
| ARG TORCH_WHEELS_PATH="./requirements" | ||||
| # Install torch, torchaudio and torchvision | ||||
| # if TORCH_WHEELS_PATH is default "./requirements", it will pull the nightly versions using pip using torch_build_versions.txt | ||||
| # otherwise, it will use the whls from TORCH_WHEELS_PATH from the host machine | ||||
| RUN --mount=type=bind,source=${TORCH_WHEELS_PATH},target=/dist \ | ||||
|     --mount=type=cache,target=/root/.cache/uv \ | ||||
|     if [ -n "$TORCH_WHEELS_PATH" ] && [ "$TORCH_WHEELS_PATH" != "./requirements" ] && [ -d "/dist" ] && ls /dist/torch*.whl >/dev/null 2>&1; then \ | ||||
|         torch_whl=$(find /dist -maxdepth 1 -name 'torch-*.whl' -print -quit); \ | ||||
|         vision_whl=$(find /dist/vision -name 'torchvision*.whl' | head -n1 | xargs); \ | ||||
|         audio_whl=$(find /dist/audio -name 'torchaudio*.whl' | head -n1 | xargs); \ | ||||
|         echo "[INFO] Use wheels to build : '${torch_whl}' '${audio_whl}' '${vision_whl}'"; \ | ||||
|         uv pip install --system "${torch_whl}[opt-einsum]"; \ | ||||
|         uv pip install --system "${vision_whl}"; \ | ||||
|         uv pip install --system "${audio_whl}"; \ | ||||
|     else \ | ||||
|         echo "[INFO] Installing torch versions from torch_build_versions.txt"; \ | ||||
|         uv pip install --system $(cat torch_build_versions.txt | xargs) --index-url https://download.pytorch.org/whl/nightly/cu128; \ | ||||
|     fi | ||||
|  | ||||
| # Install the vllm wheel from previous stage | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system /wheels/vllm/*.whl --verbose | ||||
|  | ||||
| # Install xformers wheel from previous stage | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system /wheels/xformers/*.whl --verbose | ||||
|  | ||||
|  | ||||
| # Build flashinfer from source. | ||||
| ARG torch_cuda_arch_list='8.0;8.9;9.0a' | ||||
| # install package for build flashinfer | ||||
| # see issue: https://github.com/flashinfer-ai/flashinfer/issues/738 | ||||
|  | ||||
| RUN pip install build==1.3.0 | ||||
| RUN pip freeze | grep -E 'setuptools|packaging|build' | ||||
|  | ||||
| ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list} | ||||
| # Build flashinfer for torch nightly from source around 10 mins | ||||
| ARG FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" | ||||
| # Keep this in sync with https://github.com/vllm-project/vllm/blob/main/requirements/cuda.txt | ||||
| ARG FLASHINFER_GIT_REF="v0.2.14.post1" | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     git clone --depth 1 --recursive --shallow-submodules \ | ||||
|         --branch ${FLASHINFER_GIT_REF} \ | ||||
|         ${FLASHINFER_GIT_REPO} flashinfer \ | ||||
|     && echo "Building FlashInfer with AOT for arches: ${torch_cuda_arch_list}" \ | ||||
|     && cd flashinfer \ | ||||
|     && python3 -m flashinfer.aot \ | ||||
|     && python3 -m build --no-isolation --wheel --outdir ../wheels/flashinfer \ | ||||
|     && cd .. \ | ||||
|     && rm -rf flashinfer | ||||
|  | ||||
| # install flashinfer python | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system wheels/flashinfer/*.whl --verbose | ||||
|  | ||||
| # Logging to confirm the torch versions | ||||
| RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer' | ||||
| RUN uv pip freeze | grep -i '^torch\|^torchvision\|^torchaudio\|^xformers\|^vllm\|^flashinfer' > build_summary.txt | ||||
| ################### VLLM INSTALLED IMAGE #################### | ||||
|  | ||||
|  | ||||
| #################### UNITTEST IMAGE ############################# | ||||
| FROM vllm-base as test | ||||
|  | ||||
| ENV UV_HTTP_TIMEOUT=500 | ||||
| ENV UV_INDEX_STRATEGY="unsafe-best-match" | ||||
| # Use copy mode to avoid hardlink failures with Docker cache mounts | ||||
| ENV UV_LINK_MODE=copy | ||||
|  | ||||
| COPY tests/ tests/ | ||||
| COPY examples examples | ||||
| COPY benchmarks benchmarks | ||||
| COPY ./vllm/collect_env.py . | ||||
| COPY requirements/common.txt requirements/common.txt | ||||
| COPY use_existing_torch.py use_existing_torch.py | ||||
| COPY pyproject.toml pyproject.toml | ||||
| # Install build and runtime dependencies without stable torch version | ||||
| COPY requirements/nightly_torch_test.txt requirements/nightly_torch_test.txt | ||||
|  | ||||
| RUN python3 use_existing_torch.py | ||||
|  | ||||
| # install packages | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system -r requirements/common.txt | ||||
| # enable fast downloads from hf (for testing) | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system hf_transfer | ||||
| ENV HF_HUB_ENABLE_HF_TRANSFER 1 | ||||
|  | ||||
| # install development dependencies (for testing) | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system -e tests/vllm_test_utils | ||||
|  | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system -r requirements/nightly_torch_test.txt | ||||
|  | ||||
| # Workaround for #17068 | ||||
| # pinned commit for v2.2.4 | ||||
| RUN --mount=type=cache,target=/root/.cache/uv \ | ||||
|     uv pip install --system --no-build-isolation "git+https://github.com/state-spaces/mamba@95d8aba8a8c75aedcaa6143713b11e745e7cd0d9#egg=mamba-ssm" | ||||
|  | ||||
| # Logging to confirm the torch versions | ||||
| RUN pip freeze | grep -E 'torch|xformers|vllm|flashinfer' | ||||
|  | ||||
| # Logging to confirm all the packages are installed | ||||
| RUN pip freeze | ||||
|  | ||||
| #################### UNITTEST IMAGE ############################# | ||||
|  | ||||
| #################### EXPORT STAGE #################### | ||||
| FROM scratch as export-wheels | ||||
|  | ||||
| # Just copy the wheels we prepared in previous stages | ||||
| COPY --from=base /workspace/xformers-dist /wheels/xformers | ||||
| COPY --from=build /workspace/vllm-dist /wheels/vllm | ||||
| COPY --from=vllm-base /workspace/build_summary.txt /wheels/build_summary.txt | ||||
| COPY --from=vllm-base /workspace/wheels/flashinfer /wheels/flashinfer-python | ||||
							
								
								
									
										24
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										24
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							| @ -1,24 +0,0 @@ | ||||
| version: 2 | ||||
| updates: | ||||
|   # Update to the latest transformers version with dependabot | ||||
|   - package-ecosystem: "pip" | ||||
|     directory: "/.ci/docker/ci_commit_pins" | ||||
|     schedule: | ||||
|       interval: "daily" | ||||
|     target-branch: "main" | ||||
|     allow: | ||||
|       - dependency-name: "transformers" | ||||
|     ignore: | ||||
|       - dependency-name: "*" | ||||
|         update-types: ["version-update:semver-patch"] | ||||
|     commit-message: | ||||
|       prefix: "[Dependabot] Update" | ||||
|       include: "scope" | ||||
|     labels: | ||||
|       - "dependencies" | ||||
|       - "open source" | ||||
|       - "python" | ||||
|       - "topic: not user facing" | ||||
|       - "module: ci" | ||||
|       - "module: inductor" | ||||
|       - "ciflow/inductor" | ||||
							
								
								
									
										2
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/pytorch-probot.yml
									
									
									
									
										vendored
									
									
								
							| @ -22,12 +22,10 @@ ciflow_push_tags: | ||||
| - ciflow/rocm | ||||
| - ciflow/rocm-mi300 | ||||
| - ciflow/s390 | ||||
| - ciflow/riscv64 | ||||
| - ciflow/slow | ||||
| - ciflow/trunk | ||||
| - ciflow/unstable | ||||
| - ciflow/xpu | ||||
| - ciflow/vllm | ||||
| - ciflow/torchbench | ||||
| - ciflow/op-benchmark | ||||
| - ciflow/pull | ||||
|  | ||||
| @ -28,7 +28,7 @@ pyyaml==6.0.2 | ||||
| scipy==1.12.0 | ||||
| setuptools==72.1.0 | ||||
| sympy==1.13.3 | ||||
| tlparse==0.4.0 | ||||
| tlparse==0.3.30 | ||||
| tensorboard==2.13.0 | ||||
| typing-extensions==4.12.2 | ||||
| unittest-xml-reporting<=3.2.0,>=2.0.0 | ||||
|  | ||||
							
								
								
									
										31
									
								
								.github/scripts/amd/package_triton_wheel.sh
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										31
									
								
								.github/scripts/amd/package_triton_wheel.sh
									
									
									
									
										vendored
									
									
								
							| @ -1,4 +1,3 @@ | ||||
| #!/bin/bash | ||||
| set -ex | ||||
|  | ||||
| # Set ROCM_HOME isn't available, use ROCM_PATH if set or /opt/rocm | ||||
| @ -51,15 +50,29 @@ do | ||||
|     cp $lib $TRITON_ROCM_DIR/lib/ | ||||
| done | ||||
|  | ||||
| # Required ROCm libraries | ||||
| if [[ "${MAJOR_VERSION}" == "6" ]]; then | ||||
|     libamdhip="libamdhip64.so.6" | ||||
| else | ||||
|     libamdhip="libamdhip64.so.5" | ||||
| fi | ||||
|  | ||||
| # Required ROCm libraries - ROCm 6.0 | ||||
| ROCM_SO=( | ||||
|     "libamdhip64.so" | ||||
|     "libhsa-runtime64.so" | ||||
|     "libdrm.so" | ||||
|     "libdrm_amdgpu.so" | ||||
|     "libamd_comgr.so" | ||||
|     "librocprofiler-register.so" | ||||
|     "${libamdhip}" | ||||
|     "libhsa-runtime64.so.1" | ||||
|     "libdrm.so.2" | ||||
|     "libdrm_amdgpu.so.1" | ||||
| ) | ||||
| if [[ $ROCM_INT -ge 60400 ]]; then | ||||
|     ROCM_SO+=("libamd_comgr.so.3") | ||||
| else | ||||
|     ROCM_SO+=("libamd_comgr.so.2") | ||||
| fi | ||||
|  | ||||
| if [[ $ROCM_INT -ge 60100 ]]; then | ||||
|     ROCM_SO+=("librocprofiler-register.so.0") | ||||
| fi | ||||
|  | ||||
| for lib in "${ROCM_SO[@]}" | ||||
| do | ||||
| @ -81,6 +94,10 @@ do | ||||
|     fi | ||||
|  | ||||
|     cp $file_path $TRITON_ROCM_DIR/lib | ||||
|     # When running locally, and not building a wheel, we need to satisfy shared objects requests that don't look for versions | ||||
|     LINKNAME=$(echo $lib | sed -e 's/\.so.*/.so/g') | ||||
|     ln -sf $lib $TRITON_ROCM_DIR/lib/$LINKNAME | ||||
|  | ||||
| done | ||||
|  | ||||
| # Copy Include Files | ||||
|  | ||||
							
								
								
									
										16
									
								
								.github/scripts/amd/patch_triton_wheel.sh
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								.github/scripts/amd/patch_triton_wheel.sh
									
									
									
									
										vendored
									
									
								
							| @ -19,13 +19,15 @@ replace_needed_sofiles() { | ||||
|     find $1 -name '*.so*' -o -name 'ld.lld' | while read sofile; do | ||||
|         origname=$2 | ||||
|         patchedname=$3 | ||||
|         set +e | ||||
|         origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*") | ||||
|         ERRCODE=$? | ||||
|         set -e | ||||
|         if [ "$ERRCODE" -eq "0" ]; then | ||||
|             echo "patching $sofile entry $origname to $patchedname" | ||||
|             $PATCHELF_BIN --replace-needed $origname $patchedname $sofile | ||||
|         if [[ "$origname" != "$patchedname" ]]; then | ||||
|             set +e | ||||
|             origname=$($PATCHELF_BIN --print-needed $sofile | grep "$origname.*") | ||||
|             ERRCODE=$? | ||||
|             set -e | ||||
|             if [ "$ERRCODE" -eq "0" ]; then | ||||
|                 echo "patching $sofile entry $origname to $patchedname" | ||||
|                 $PATCHELF_BIN --replace-needed $origname $patchedname $sofile | ||||
|             fi | ||||
|         fi | ||||
|     done | ||||
| } | ||||
|  | ||||
							
								
								
									
										118
									
								
								.github/scripts/generate_binary_build_matrix.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										118
									
								
								.github/scripts/generate_binary_build_matrix.py
									
									
									
									
										vendored
									
									
								
							| @ -16,19 +16,17 @@ from typing import Optional | ||||
|  | ||||
|  | ||||
| # NOTE: Please also update the CUDA sources in `PIP_SOURCES` in tools/nightly.py when changing this | ||||
| CUDA_ARCHES = ["12.6", "12.8", "12.9", "13.0"] | ||||
| CUDA_ARCHES = ["12.6", "12.8", "12.9"] | ||||
| CUDA_STABLE = "12.8" | ||||
| CUDA_ARCHES_FULL_VERSION = { | ||||
|     "12.6": "12.6.3", | ||||
|     "12.8": "12.8.1", | ||||
|     "12.9": "12.9.1", | ||||
|     "13.0": "13.0.0", | ||||
| } | ||||
| CUDA_ARCHES_CUDNN_VERSION = { | ||||
|     "12.6": "9", | ||||
|     "12.8": "9", | ||||
|     "12.9": "9", | ||||
|     "13.0": "9", | ||||
| } | ||||
|  | ||||
| # NOTE: Please also update the ROCm sources in `PIP_SOURCES` in tools/nightly.py when changing this | ||||
| @ -40,7 +38,7 @@ CPU_AARCH64_ARCH = ["cpu-aarch64"] | ||||
|  | ||||
| CPU_S390X_ARCH = ["cpu-s390x"] | ||||
|  | ||||
| CUDA_AARCH64_ARCHES = ["12.9-aarch64", "13.0-aarch64"] | ||||
| CUDA_AARCH64_ARCHES = ["12.9-aarch64"] | ||||
|  | ||||
|  | ||||
| PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { | ||||
| @ -56,7 +54,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { | ||||
|         "nvidia-cusparse-cu12==12.5.4.2; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvtx-cu12==12.6.77; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvjitlink-cu12==12.6.85; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufile-cu12==1.11.1.6; platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
| @ -73,7 +71,7 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { | ||||
|         "nvidia-cusparse-cu12==12.5.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvtx-cu12==12.8.90; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvjitlink-cu12==12.8.93; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufile-cu12==1.13.1.3; platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
| @ -90,49 +88,32 @@ PYTORCH_EXTRA_INSTALL_REQUIREMENTS = { | ||||
|         "nvidia-cusparse-cu12==12.5.10.65; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparselt-cu12==0.7.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nccl-cu12==2.27.5; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.20; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu12==3.3.9; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvtx-cu12==12.9.79; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvjitlink-cu12==12.9.86; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufile-cu12==1.14.1.1; platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
|     ), | ||||
|     "13.0": ( | ||||
|         "nvidia-cuda-nvrtc==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-runtime==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cuda-cupti==13.0.48; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cudnn-cu13==9.12.0.46; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cublas==13.0.0.19; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufft==12.0.0.15; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-curand==10.4.0.35; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusolver==12.0.3.29; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparse==12.6.2.49; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cusparselt-cu13==0.8.0; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nccl-cu13==2.27.7; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvshmem-cu13==3.3.24; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvtx==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-nvjitlink==13.0.39; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "nvidia-cufile==1.15.0.42; platform_system == 'Linux' and platform_machine == 'x86_64'" | ||||
|     ), | ||||
|     "xpu": ( | ||||
|         "intel-cmplr-lib-rt==2025.2.1 | " | ||||
|         "intel-cmplr-lib-ur==2025.2.1 | " | ||||
|         "intel-cmplr-lic-rt==2025.2.1 | " | ||||
|         "intel-sycl-rt==2025.2.1 | " | ||||
|         "oneccl-devel==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "oneccl==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "impi-rt==2021.16.1; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "onemkl-sycl-blas==2025.2.0 | " | ||||
|         "onemkl-sycl-dft==2025.2.0 | " | ||||
|         "onemkl-sycl-lapack==2025.2.0 | " | ||||
|         "onemkl-sycl-rng==2025.2.0 | " | ||||
|         "onemkl-sycl-sparse==2025.2.0 | " | ||||
|         "dpcpp-cpp-rt==2025.2.1 | " | ||||
|         "intel-opencl-rt==2025.2.1 | " | ||||
|         "mkl==2025.2.0 | " | ||||
|         "intel-openmp==2025.2.1 | " | ||||
|         "tbb==2022.2.0 | " | ||||
|         "tcmlib==1.4.0 | " | ||||
|         "umf==0.11.0 | " | ||||
|         "intel-pti==0.13.1" | ||||
|         "intel-cmplr-lib-rt==2025.1.1 | " | ||||
|         "intel-cmplr-lib-ur==2025.1.1 | " | ||||
|         "intel-cmplr-lic-rt==2025.1.1 | " | ||||
|         "intel-sycl-rt==2025.1.1 | " | ||||
|         "oneccl-devel==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "oneccl==2021.15.2; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "impi-rt==2021.15.0; platform_system == 'Linux' and platform_machine == 'x86_64' | " | ||||
|         "onemkl-sycl-blas==2025.1.0 | " | ||||
|         "onemkl-sycl-dft==2025.1.0 | " | ||||
|         "onemkl-sycl-lapack==2025.1.0 | " | ||||
|         "onemkl-sycl-rng==2025.1.0 | " | ||||
|         "onemkl-sycl-sparse==2025.1.0 | " | ||||
|         "dpcpp-cpp-rt==2025.1.1 | " | ||||
|         "intel-opencl-rt==2025.1.1 | " | ||||
|         "mkl==2025.1.0 | " | ||||
|         "intel-openmp==2025.1.1 | " | ||||
|         "tbb==2022.1.0 | " | ||||
|         "tcmlib==1.3.0 | " | ||||
|         "umf==0.10.0 | " | ||||
|         "intel-pti==0.12.3" | ||||
|     ), | ||||
| } | ||||
|  | ||||
| @ -143,7 +124,9 @@ def get_nccl_wheel_version(arch_version: str) -> str: | ||||
|     requirements = map( | ||||
|         str.strip, re.split("[;|]", PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version]) | ||||
|     ) | ||||
|     return next(x for x in requirements if x.startswith("nvidia-nccl")).split("==")[1] | ||||
|     return next(x for x in requirements if x.startswith("nvidia-nccl-cu")).split("==")[ | ||||
|         1 | ||||
|     ] | ||||
|  | ||||
|  | ||||
| def read_nccl_pin(arch_version: str) -> str: | ||||
| @ -210,7 +193,7 @@ LIBTORCH_CONTAINER_IMAGES: dict[str, str] = { | ||||
|     "cpu": "libtorch-cxx11-builder:cpu", | ||||
| } | ||||
|  | ||||
| FULL_PYTHON_VERSIONS = ["3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"] | ||||
| FULL_PYTHON_VERSIONS = ["3.9", "3.10", "3.11", "3.12", "3.13", "3.13t", "3.14", "3.14t"] | ||||
|  | ||||
|  | ||||
| def translate_desired_cuda(gpu_arch_type: str, gpu_arch_version: str) -> str: | ||||
| @ -240,8 +223,6 @@ def generate_libtorch_matrix( | ||||
|         if os == "linux": | ||||
|             arches += CUDA_ARCHES | ||||
|             arches += ROCM_ARCHES | ||||
|             if "13.0" in arches: | ||||
|                 arches.remove("13.0") | ||||
|         elif os == "windows": | ||||
|             arches += CUDA_ARCHES | ||||
|     if libtorch_variants is None: | ||||
| @ -330,20 +311,19 @@ def generate_wheels_matrix( | ||||
|                 else arch_version | ||||
|             ) | ||||
|  | ||||
|             # TODO: Enable python 3.14 for rest | ||||
|             if os not in [ | ||||
|                 "linux", | ||||
|                 "linux-aarch64", | ||||
|                 "linux-s390x", | ||||
|                 "macos-arm64", | ||||
|                 "windows", | ||||
|             ] and (python_version == "3.14" or python_version == "3.14t"): | ||||
|             # TODO: Enable python 3.13t on cpu-s390x | ||||
|             if gpu_arch_type == "cpu-s390x" and python_version == "3.13t": | ||||
|                 continue | ||||
|             # TODO: Enable python 3.14 on non linux OSes | ||||
|             if os != "linux" and ( | ||||
|                 python_version == "3.14" or python_version == "3.14t" | ||||
|             ): | ||||
|                 continue | ||||
|  | ||||
|             # cuda linux wheels require PYTORCH_EXTRA_INSTALL_REQUIREMENTS to install | ||||
|  | ||||
|             if ( | ||||
|                 arch_version in ["13.0", "12.9", "12.8", "12.6"] | ||||
|                 arch_version in ["12.9", "12.8", "12.6"] | ||||
|                 and os == "linux" | ||||
|                 or arch_version in CUDA_AARCH64_ARCHES | ||||
|             ): | ||||
| @ -376,6 +356,29 @@ def generate_wheels_matrix( | ||||
|                         ),  # include special case for aarch64 build, remove the -aarch64 postfix | ||||
|                     } | ||||
|                 ) | ||||
|                 # Special build building to use on Colab. Python 3.11 for 12.6 CUDA | ||||
|                 if python_version == "3.11" and arch_version == CUDA_STABLE: | ||||
|                     ret.append( | ||||
|                         { | ||||
|                             "python_version": python_version, | ||||
|                             "gpu_arch_type": gpu_arch_type, | ||||
|                             "gpu_arch_version": gpu_arch_version, | ||||
|                             "desired_cuda": translate_desired_cuda( | ||||
|                                 gpu_arch_type, gpu_arch_version | ||||
|                             ), | ||||
|                             "container_image": WHEEL_CONTAINER_IMAGES[ | ||||
|                                 arch_version | ||||
|                             ].split(":")[0], | ||||
|                             "container_image_tag_prefix": WHEEL_CONTAINER_IMAGES[ | ||||
|                                 arch_version | ||||
|                             ].split(":")[1], | ||||
|                             "package_type": package_type, | ||||
|                             "pytorch_extra_install_requirements": "", | ||||
|                             "build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-full".replace(  # noqa: B950 | ||||
|                                 ".", "_" | ||||
|                             ), | ||||
|                         } | ||||
|                     ) | ||||
|             else: | ||||
|                 ret.append( | ||||
|                     { | ||||
| @ -406,7 +409,6 @@ def generate_wheels_matrix( | ||||
|     return ret | ||||
|  | ||||
|  | ||||
| validate_nccl_dep_consistency("13.0") | ||||
| validate_nccl_dep_consistency("12.9") | ||||
| validate_nccl_dep_consistency("12.8") | ||||
| validate_nccl_dep_consistency("12.6") | ||||
|  | ||||
							
								
								
									
										4
									
								
								.github/scripts/generate_ci_workflows.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/scripts/generate_ci_workflows.py
									
									
									
									
										vendored
									
									
								
							| @ -22,7 +22,7 @@ LABEL_CIFLOW_BINARIES = "ciflow/binaries" | ||||
| LABEL_CIFLOW_PERIODIC = "ciflow/periodic" | ||||
| LABEL_CIFLOW_BINARIES_LIBTORCH = "ciflow/binaries_libtorch" | ||||
| LABEL_CIFLOW_BINARIES_WHEEL = "ciflow/binaries_wheel" | ||||
| LABEL_CIFLOW_ROCM = "ciflow/rocm-mi300" | ||||
| LABEL_CIFLOW_ROCM = "ciflow/rocm" | ||||
|  | ||||
|  | ||||
| @dataclass | ||||
| @ -139,6 +139,8 @@ ROCM_SMOKE_WORKFLOWS = [ | ||||
|         ), | ||||
|         ciflow_config=CIFlowConfig( | ||||
|             labels={ | ||||
|                 LABEL_CIFLOW_BINARIES, | ||||
|                 LABEL_CIFLOW_BINARIES_WHEEL, | ||||
|                 LABEL_CIFLOW_ROCM, | ||||
|             }, | ||||
|             isolated_workflow=True, | ||||
|  | ||||
							
								
								
									
										182
									
								
								.github/scripts/test_trymerge.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										182
									
								
								.github/scripts/test_trymerge.py
									
									
									
									
										vendored
									
									
								
							| @ -27,7 +27,6 @@ from trymerge import ( | ||||
|     get_drci_classifications, | ||||
|     gh_get_team_members, | ||||
|     GitHubPR, | ||||
|     iter_issue_timeline_until_comment, | ||||
|     JobCheckState, | ||||
|     main as trymerge_main, | ||||
|     MandatoryChecksMissingError, | ||||
| @ -35,8 +34,6 @@ from trymerge import ( | ||||
|     RE_GHSTACK_DESC, | ||||
|     read_merge_rules, | ||||
|     remove_job_name_suffix, | ||||
|     sha_from_committed_event, | ||||
|     sha_from_force_push_after, | ||||
|     validate_revert, | ||||
| ) | ||||
|  | ||||
| @ -127,7 +124,7 @@ def mock_parse_args(revert: bool = False, force: bool = False) -> Any: | ||||
|             self.force = force | ||||
|             self.pr_num = 76123 | ||||
|             self.dry_run = True | ||||
|             self.comment_id = 12345  # Set to non-zero value | ||||
|             self.comment_id = 0 | ||||
|             self.reason = "this is for testing" | ||||
|             self.ignore_current = False | ||||
|             self.check_mergeability = False | ||||
| @ -155,9 +152,9 @@ def mock_revert( | ||||
| def mock_merge( | ||||
|     pr: GitHubPR, | ||||
|     repo: GitRepo, | ||||
|     comment_id: int, | ||||
|     dry_run: bool = False, | ||||
|     skip_mandatory_checks: bool = False, | ||||
|     comment_id: Optional[int] = None, | ||||
|     timeout_minutes: int = 400, | ||||
|     stale_pr_days: int = 3, | ||||
|     ignore_current: bool = False, | ||||
| @ -473,9 +470,9 @@ class TestTryMerge(TestCase): | ||||
|         mock_merge.assert_called_once_with( | ||||
|             mock.ANY, | ||||
|             mock.ANY, | ||||
|             comment_id=mock.ANY, | ||||
|             dry_run=mock.ANY, | ||||
|             skip_mandatory_checks=True, | ||||
|             comment_id=mock.ANY, | ||||
|             ignore_current=False, | ||||
|         ) | ||||
|  | ||||
| @ -488,9 +485,9 @@ class TestTryMerge(TestCase): | ||||
|         mock_merge.assert_called_once_with( | ||||
|             mock.ANY, | ||||
|             mock.ANY, | ||||
|             comment_id=mock.ANY, | ||||
|             dry_run=mock.ANY, | ||||
|             skip_mandatory_checks=False, | ||||
|             comment_id=mock.ANY, | ||||
|             ignore_current=False, | ||||
|         ) | ||||
|  | ||||
| @ -1141,176 +1138,5 @@ Pull Request resolved: https://github.com/pytorch/pytorch/pull/154394""" | ||||
|         ) | ||||
|  | ||||
|  | ||||
| @mock.patch("trymerge.gh_graphql", side_effect=mocked_gh_graphql) | ||||
| @mock.patch("trymerge.gh_fetch_merge_base", return_value="") | ||||
| @mock.patch( | ||||
|     "trymerge.get_drci_classifications", side_effect=mocked_drci_classifications | ||||
| ) | ||||
| class TestTimelineFunctions(TestCase): | ||||
|     """Tests for the new timeline-related functions""" | ||||
|  | ||||
|     def test_sha_from_committed_event(self, *args: Any) -> None: | ||||
|         """Test extracting SHA from committed event""" | ||||
|         # Based on actual GitHub API format - committed events have "sha" at top level | ||||
|         event = { | ||||
|             "event": "committed", | ||||
|             "sha": "fb21ce932ded6670c918804a0d9151b773770a7c", | ||||
|         } | ||||
|         self.assertEqual( | ||||
|             sha_from_committed_event(event), "fb21ce932ded6670c918804a0d9151b773770a7c" | ||||
|         ) | ||||
|  | ||||
|         # Test with missing SHA | ||||
|         event_no_sha = {"event": "committed"} | ||||
|         self.assertIsNone(sha_from_committed_event(event_no_sha)) | ||||
|  | ||||
|     def test_sha_from_force_push_after(self, *args: Any) -> None: | ||||
|         """Test extracting SHA from force push event""" | ||||
|         # NOTE: The current function doesn't handle the actual GitHub API format | ||||
|         # Real force push events have "commit_id" at top level, but this function | ||||
|         # looks for "after", "after_commit", "after_sha", or "head_sha" fields | ||||
|  | ||||
|         # Test with the legacy format the current function handles | ||||
|         event_legacy = { | ||||
|             "event": "head_ref_force_pushed", | ||||
|             "after": {"sha": "ef22bcbc54bb0f787e1e4ffd3d83df18fc407f5e"}, | ||||
|         } | ||||
|         self.assertEqual( | ||||
|             sha_from_force_push_after(event_legacy), | ||||
|             "ef22bcbc54bb0f787e1e4ffd3d83df18fc407f5e", | ||||
|         ) | ||||
|  | ||||
|         # Test with current GitHub API format (should return None with current implementation) | ||||
|         event_real_api = { | ||||
|             "event": "head_ref_force_pushed", | ||||
|             "commit_id": "ef22bcbc54bb0f787e1e4ffd3d83df18fc407f5e", | ||||
|         } | ||||
|         self.assertEqual( | ||||
|             sha_from_force_push_after(event_real_api), | ||||
|             "ef22bcbc54bb0f787e1e4ffd3d83df18fc407f5e", | ||||
|         )  # Current function doesn't handle commit_id | ||||
|  | ||||
|         # Test with missing SHA | ||||
|         event_no_sha = {"event": "head_ref_force_pushed"} | ||||
|         self.assertIsNone(sha_from_force_push_after(event_no_sha)) | ||||
|  | ||||
|     @mock.patch("trymerge.gh_fetch_json_list") | ||||
|     def test_iter_issue_timeline_until_comment( | ||||
|         self, mock_gh_fetch_json_list: Any, *args: Any | ||||
|     ) -> None: | ||||
|         """Test timeline iteration until target comment""" | ||||
|         # Mock timeline data based on actual GitHub API format | ||||
|         timeline_data = [ | ||||
|             {"event": "commented", "id": 100, "body": "first comment"}, | ||||
|             {"event": "committed", "sha": "fb21ce932ded6670c918804a0d9151b773770a7c"}, | ||||
|             {"event": "commented", "id": 200, "body": "target comment"}, | ||||
|             {"event": "commented", "id": 300, "body": "after target"}, | ||||
|         ] | ||||
|         mock_gh_fetch_json_list.return_value = timeline_data | ||||
|  | ||||
|         # Test iteration stops at target comment | ||||
|         events = list(iter_issue_timeline_until_comment("pytorch", "pytorch", 123, 200)) | ||||
|         self.assertEqual(len(events), 3)  # Should stop at target comment | ||||
|         self.assertEqual(events[0]["event"], "commented") | ||||
|         self.assertEqual(events[0]["id"], 100) | ||||
|         self.assertEqual(events[1]["event"], "committed") | ||||
|         self.assertEqual(events[1]["sha"], "fb21ce932ded6670c918804a0d9151b773770a7c") | ||||
|         self.assertEqual(events[2]["event"], "commented") | ||||
|         self.assertEqual(events[2]["id"], 200) | ||||
|  | ||||
|     @mock.patch("trymerge.gh_fetch_json_list") | ||||
|     def test_iter_issue_timeline_until_comment_not_found( | ||||
|         self, mock_gh_fetch_json_list: Any, *args: Any | ||||
|     ) -> None: | ||||
|         """Test timeline iteration when target comment is not found""" | ||||
|         # Mock empty timeline | ||||
|         mock_gh_fetch_json_list.return_value = [] | ||||
|  | ||||
|         events = list(iter_issue_timeline_until_comment("pytorch", "pytorch", 123, 999)) | ||||
|         self.assertEqual(len(events), 0) | ||||
|  | ||||
|     @mock.patch("trymerge.iter_issue_timeline_until_comment") | ||||
|     def test_get_commit_sha_at_comment_commit_after_comment( | ||||
|         self, mock_iter_timeline: Any, *args: Any | ||||
|     ) -> None: | ||||
|         """Test get_commit_sha_at_comment returns correct SHA after comment""" | ||||
|         mock_iter_timeline.return_value = [ | ||||
|             {"event": "committed", "sha": "commit1"}, | ||||
|             {"event": "committed", "sha": "commit2"}, | ||||
|             {"event": "commented", "id": 100}, | ||||
|             {"event": "head_ref_force_pushed", "after": {"sha": "commit3"}}, | ||||
|         ] | ||||
|         pr = GitHubPR("pytorch", "pytorch", 77700) | ||||
|         sha = pr.get_commit_sha_at_comment(100) | ||||
|         self.assertEqual(sha, "commit2") | ||||
|  | ||||
|     @mock.patch("trymerge.iter_issue_timeline_until_comment") | ||||
|     def test_get_commit_sha_at_comment_force_push_before_comment( | ||||
|         self, mock_iter_timeline: Any, *args: Any | ||||
|     ) -> None: | ||||
|         mock_iter_timeline.return_value = [ | ||||
|             {"event": "committed", "sha": "commit1"}, | ||||
|             {"event": "committed", "sha": "commit2"}, | ||||
|             {"event": "head_ref_force_pushed", "commit_id": "commit3"}, | ||||
|             {"event": "commented", "id": 100}, | ||||
|         ] | ||||
|         pr = GitHubPR("pytorch", "pytorch", 77700) | ||||
|         sha = pr.get_commit_sha_at_comment(100) | ||||
|         self.assertEqual(sha, "commit3") | ||||
|  | ||||
|     @mock.patch("trymerge.iter_issue_timeline_until_comment") | ||||
|     def test_get_commit_sha_at_comment_force_push_before_comment_legacy_mode( | ||||
|         self, mock_iter_timeline: Any, *args: Any | ||||
|     ) -> None: | ||||
|         mock_iter_timeline.return_value = [ | ||||
|             {"event": "committed", "sha": "commit1"}, | ||||
|             {"event": "committed", "sha": "commit2"}, | ||||
|             {"event": "head_ref_force_pushed", "after": {"sha": "commit3"}}, | ||||
|             {"event": "commented", "id": 100}, | ||||
|         ] | ||||
|         pr = GitHubPR("pytorch", "pytorch", 77700) | ||||
|         sha = pr.get_commit_sha_at_comment(100) | ||||
|         self.assertEqual(sha, "commit3") | ||||
|  | ||||
|     @mock.patch("trymerge.iter_issue_timeline_until_comment") | ||||
|     def test_get_commit_sha_at_comment_multiple_comments( | ||||
|         self, mock_iter_timeline: Any, *args: Any | ||||
|     ) -> None: | ||||
|         mock_iter_timeline.return_value = [ | ||||
|             {"event": "committed", "sha": "commit1"}, | ||||
|             {"event": "commented", "id": 100}, | ||||
|             {"event": "committed", "sha": "commit2"}, | ||||
|             {"event": "commented", "id": 200}, | ||||
|             {"event": "head_ref_force_pushed", "after": {"sha": "commit3"}}, | ||||
|             {"event": "commented", "id": 300}, | ||||
|         ] | ||||
|         pr = GitHubPR("pytorch", "pytorch", 77700) | ||||
|         sha = pr.get_commit_sha_at_comment(200) | ||||
|         self.assertEqual(sha, "commit2") | ||||
|         sha = pr.get_commit_sha_at_comment(300) | ||||
|         self.assertEqual(sha, "commit3") | ||||
|  | ||||
|     @mock.patch("trymerge.iter_issue_timeline_until_comment") | ||||
|     def test_get_commit_sha_at_comment_no_events( | ||||
|         self, mock_iter_timeline: Any, *args: Any | ||||
|     ) -> None: | ||||
|         mock_iter_timeline.return_value = [ | ||||
|             {"event": "commented", "id": 100}, | ||||
|             {"event": "labeled", "label": {"name": "test"}}, | ||||
|         ] | ||||
|         pr = GitHubPR("pytorch", "pytorch", 77700) | ||||
|         sha = pr.get_commit_sha_at_comment(100) | ||||
|         self.assertIsNone(sha) | ||||
|  | ||||
|     @mock.patch("trymerge.iter_issue_timeline_until_comment") | ||||
|     def test_get_commit_sha_at_comment_exception( | ||||
|         self, mock_iter_timeline: Any, *args: Any | ||||
|     ) -> None: | ||||
|         mock_iter_timeline.side_effect = Exception("API error") | ||||
|         pr = GitHubPR("pytorch", "pytorch", 77700) | ||||
|         sha = pr.get_commit_sha_at_comment(100) | ||||
|         self.assertIsNone(sha) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
|  | ||||
							
								
								
									
										197
									
								
								.github/scripts/trymerge.py
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										197
									
								
								.github/scripts/trymerge.py
									
									
									
									
										vendored
									
									
								
							| @ -450,63 +450,6 @@ HAS_NO_CONNECTED_DIFF_TITLE = ( | ||||
| IGNORABLE_FAILED_CHECKS_THESHOLD = 10 | ||||
|  | ||||
|  | ||||
| def iter_issue_timeline_until_comment( | ||||
|     org: str, repo: str, issue_number: int, target_comment_id: int, max_pages: int = 200 | ||||
| ) -> Any: | ||||
|     """ | ||||
|     Yield timeline entries in order until (and including) the entry whose id == target_comment_id | ||||
|     for a 'commented' event. Stops once the target comment is encountered. | ||||
|     """ | ||||
|     page = 1 | ||||
|  | ||||
|     while page <= max_pages: | ||||
|         url = ( | ||||
|             f"https://api.github.com/repos/{org}/{repo}/issues/{issue_number}/timeline" | ||||
|         ) | ||||
|         params = {"per_page": 100, "page": page} | ||||
|  | ||||
|         batch = gh_fetch_json_list(url, params) | ||||
|  | ||||
|         if not batch: | ||||
|             return | ||||
|         for ev in batch: | ||||
|             # The target is the issue comment row with event == "commented" and id == issue_comment_id | ||||
|             if ev.get("event") == "commented" and ev.get("id") == target_comment_id: | ||||
|                 yield ev  # nothing in the timeline after this matters, so stop early | ||||
|                 return | ||||
|             yield ev | ||||
|         if len(batch) < 100: | ||||
|             return | ||||
|         page += 1 | ||||
|  | ||||
|     # If we got here without finding the comment, then we either hit a bug or some github PR | ||||
|     # has a _really_ long timeline. | ||||
|     # The max # of pages found on any pytorch/pytorch PR at the time of this change was 41 | ||||
|     raise RuntimeError( | ||||
|         f"Could not find a merge commit in the first {max_pages} pages of the timeline at url {url}." | ||||
|         f"This is most likely a bug, please report it to the @pytorch/pytorch-dev-infra team." | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def sha_from_committed_event(ev: dict[str, Any]) -> Optional[str]: | ||||
|     """Extract SHA from committed event in timeline""" | ||||
|     return ev.get("sha") | ||||
|  | ||||
|  | ||||
| def sha_from_force_push_after(ev: dict[str, Any]) -> Optional[str]: | ||||
|     """Extract SHA from force push event in timeline""" | ||||
|     # The current GitHub API format | ||||
|     commit_id = ev.get("commit_id") | ||||
|     if commit_id: | ||||
|         return str(commit_id) | ||||
|  | ||||
|     # Legacy format | ||||
|     after = ev.get("after") or ev.get("after_commit") or {} | ||||
|     if isinstance(after, dict): | ||||
|         return after.get("sha") or after.get("oid") | ||||
|     return ev.get("after_sha") or ev.get("head_sha") | ||||
|  | ||||
|  | ||||
| def gh_get_pr_info(org: str, proj: str, pr_no: int) -> Any: | ||||
|     rc = gh_graphql(GH_GET_PR_INFO_QUERY, name=proj, owner=org, number=pr_no) | ||||
|     return rc["data"]["repository"]["pullRequest"] | ||||
| @ -794,24 +737,16 @@ class GitHubPR: | ||||
|     def last_commit(self) -> Any: | ||||
|         return self.info["commits"]["nodes"][-1]["commit"] | ||||
|  | ||||
|     def last_commit_sha(self, default: Optional[str] = None) -> str: | ||||
|         # for commits, the oid is the sha | ||||
|  | ||||
|         if default is None: | ||||
|             return str(self.last_commit()["oid"]) | ||||
|  | ||||
|         return str(self.last_commit().get("oid", default)) | ||||
|  | ||||
|     def get_merge_base(self) -> str: | ||||
|         if self.merge_base: | ||||
|             return self.merge_base | ||||
|  | ||||
|         last_commit_sha = self.last_commit_sha() | ||||
|         last_commit_oid = self.last_commit()["oid"] | ||||
|         # NB: We could use self.base_ref() here for regular PR, however, that doesn't | ||||
|         # work for ghstack where the base is the custom branch, i.e. gh/USER/ID/base, | ||||
|         # so let's just use main instead | ||||
|         self.merge_base = gh_fetch_merge_base( | ||||
|             self.org, self.project, last_commit_sha, self.default_branch() | ||||
|             self.org, self.project, last_commit_oid, self.default_branch() | ||||
|         ) | ||||
|  | ||||
|         # Fallback to baseRefOid if the API call fails, i.e. rate limit. Note that baseRefOid | ||||
| @ -900,44 +835,6 @@ class GitHubPR: | ||||
|     def get_commit_count(self) -> int: | ||||
|         return int(self.info["commits_with_authors"]["totalCount"]) | ||||
|  | ||||
|     def get_commit_sha_at_comment(self, comment_id: int) -> Optional[str]: | ||||
|         """ | ||||
|         Get the PR head commit SHA that was present when a specific comment was posted. | ||||
|         This ensures we only merge the state of the PR at the time the merge command was issued, | ||||
|         not any subsequent commits that may have been pushed after. | ||||
|  | ||||
|         Returns None if no head-changing events found before the comment or if the comment was not found. | ||||
|         """ | ||||
|         head = None | ||||
|  | ||||
|         try: | ||||
|             for event in iter_issue_timeline_until_comment( | ||||
|                 self.org, self.project, self.pr_num, comment_id | ||||
|             ): | ||||
|                 etype = event.get("event") | ||||
|                 if etype == "committed": | ||||
|                     sha = sha_from_committed_event(event) | ||||
|                     if sha: | ||||
|                         head = sha | ||||
|                         print(f"Timeline: Found commit event for SHA {sha}") | ||||
|                 elif etype == "head_ref_force_pushed": | ||||
|                     sha = sha_from_force_push_after(event) | ||||
|                     if sha: | ||||
|                         head = sha | ||||
|                         print(f"Timeline: Found force push event for SHA {sha}") | ||||
|                 elif etype == "commented": | ||||
|                     if event.get("id") == comment_id: | ||||
|                         print(f"Timeline: Found final comment with sha {sha}") | ||||
|                         return head | ||||
|         except Exception as e: | ||||
|             print( | ||||
|                 f"Warning: Failed to reconstruct timeline for comment {comment_id}: {e}" | ||||
|             ) | ||||
|             return None | ||||
|  | ||||
|         print(f"Did not find comment with id {comment_id} in the PR timeline") | ||||
|         return None | ||||
|  | ||||
|     def get_pr_creator_login(self) -> str: | ||||
|         return cast(str, self.info["author"]["login"]) | ||||
|  | ||||
| @ -1254,7 +1151,7 @@ class GitHubPR: | ||||
|         *, | ||||
|         skip_mandatory_checks: bool = False, | ||||
|         dry_run: bool = False, | ||||
|         comment_id: int, | ||||
|         comment_id: Optional[int] = None, | ||||
|         ignore_current_checks: Optional[list[str]] = None, | ||||
|     ) -> None: | ||||
|         # Raises exception if matching rule is not found | ||||
| @ -1270,7 +1167,7 @@ class GitHubPR: | ||||
|             skip_internal_checks=can_skip_internal_checks(self, comment_id), | ||||
|             ignore_current_checks=ignore_current_checks, | ||||
|         ) | ||||
|         additional_merged_prs = self.merge_changes_locally( | ||||
|         additional_merged_prs = self.merge_changes( | ||||
|             repo, skip_mandatory_checks, comment_id | ||||
|         ) | ||||
|  | ||||
| @ -1299,7 +1196,7 @@ class GitHubPR: | ||||
|                 broken_trunk_checks=ignorable_checks.get("BROKEN_TRUNK", []), | ||||
|                 flaky_checks=ignorable_checks.get("FLAKY", []), | ||||
|                 unstable_checks=ignorable_checks.get("UNSTABLE", []), | ||||
|                 last_commit_sha=self.last_commit_sha(default=""), | ||||
|                 last_commit_sha=self.last_commit().get("oid", ""), | ||||
|                 merge_base_sha=self.get_merge_base(), | ||||
|                 merge_commit_sha=merge_commit_sha, | ||||
|                 is_failed=False, | ||||
| @ -1320,7 +1217,7 @@ class GitHubPR: | ||||
|             dry_run=dry_run, | ||||
|         ) | ||||
|  | ||||
|     def merge_changes_locally( | ||||
|     def merge_changes( | ||||
|         self, | ||||
|         repo: GitRepo, | ||||
|         skip_mandatory_checks: bool = False, | ||||
| @ -1329,15 +1226,27 @@ class GitHubPR: | ||||
|         skip_all_rule_checks: bool = False, | ||||
|     ) -> list["GitHubPR"]: | ||||
|         """ | ||||
|         :param skip_all_rule_checks: If true, skips all rule checks on ghstack PRs, useful for dry-running merge locally | ||||
|         :param skip_all_rule_checks: If true, skips all rule checks, useful for dry-running merge locally | ||||
|         """ | ||||
|         branch_to_merge_into = self.default_branch() if branch is None else branch | ||||
|         if repo.current_branch() != branch_to_merge_into: | ||||
|             repo.checkout(branch_to_merge_into) | ||||
|         if not self.is_ghstack_pr(): | ||||
|             msg = self.gen_commit_message() | ||||
|             pr_branch_name = f"__pull-request-{self.pr_num}__init__" | ||||
|             repo.fetch(self.last_commit()["oid"], pr_branch_name) | ||||
|             repo._run_git("merge", "--squash", pr_branch_name) | ||||
|             repo._run_git("commit", f'--author="{self.get_author()}"', "-m", msg) | ||||
|  | ||||
|         # It's okay to skip the commit SHA check for ghstack PRs since | ||||
|         # authoring requires write access to the repo. | ||||
|         if self.is_ghstack_pr(): | ||||
|             # Did the PR change since we started the merge? | ||||
|             pulled_sha = repo.show_ref(pr_branch_name) | ||||
|             latest_pr_status = GitHubPR(self.org, self.project, self.pr_num) | ||||
|             if pulled_sha != latest_pr_status.last_commit()["oid"]: | ||||
|                 raise RuntimeError( | ||||
|                     "PR has been updated since CI checks last passed. Please rerun the merge command." | ||||
|                 ) | ||||
|             return [] | ||||
|         else: | ||||
|             return self.merge_ghstack_into( | ||||
|                 repo, | ||||
|                 skip_mandatory_checks, | ||||
| @ -1345,48 +1254,6 @@ class GitHubPR: | ||||
|                 skip_all_rule_checks=skip_all_rule_checks, | ||||
|             ) | ||||
|  | ||||
|         msg = self.gen_commit_message() | ||||
|         pr_branch_name = f"__pull-request-{self.pr_num}__init__" | ||||
|  | ||||
|         # Determine which commit SHA to merge | ||||
|         commit_to_merge = None | ||||
|         if not comment_id: | ||||
|             raise ValueError("Must provide --comment-id when merging regular PRs") | ||||
|  | ||||
|         # Get the commit SHA that was present when the comment was made | ||||
|         commit_to_merge = self.get_commit_sha_at_comment(comment_id) | ||||
|         if not commit_to_merge: | ||||
|             raise RuntimeError( | ||||
|                 f"Could not find commit that was pushed before comment {comment_id}" | ||||
|             ) | ||||
|  | ||||
|         # Validate that this commit is the latest commit on the PR | ||||
|         latest_commit = self.last_commit_sha() | ||||
|         if commit_to_merge != latest_commit: | ||||
|             raise RuntimeError( | ||||
|                 f"Commit {commit_to_merge} was HEAD when comment {comment_id} was posted " | ||||
|                 f"but now the latest commit on the PR is {latest_commit}. " | ||||
|                 f"Please re-issue the merge command to merge the latest commit." | ||||
|             ) | ||||
|  | ||||
|         print(f"Merging commit {commit_to_merge} locally") | ||||
|  | ||||
|         repo.fetch(commit_to_merge, pr_branch_name) | ||||
|         repo._run_git("merge", "--squash", pr_branch_name) | ||||
|         repo._run_git("commit", f'--author="{self.get_author()}"', "-m", msg) | ||||
|  | ||||
|         # Did the PR change since we started the merge? | ||||
|         pulled_sha = repo.show_ref(pr_branch_name) | ||||
|         latest_pr_status = GitHubPR(self.org, self.project, self.pr_num) | ||||
|         if ( | ||||
|             pulled_sha != latest_pr_status.last_commit_sha() | ||||
|             or pulled_sha != commit_to_merge | ||||
|         ): | ||||
|             raise RuntimeError( | ||||
|                 "PR has been updated since CI checks last passed. Please rerun the merge command." | ||||
|             ) | ||||
|         return [] | ||||
|  | ||||
|  | ||||
| class MergeRuleFailedError(RuntimeError): | ||||
|     def __init__(self, message: str, rule: Optional["MergeRule"] = None) -> None: | ||||
| @ -1591,7 +1458,7 @@ def find_matching_merge_rule( | ||||
|             pending_checks = [] | ||||
|             failed_checks = [] | ||||
|  | ||||
|         hud_link = f"https://hud.pytorch.org/{pr.org}/{pr.project}/commit/{pr.last_commit_sha()}" | ||||
|         hud_link = f"https://hud.pytorch.org/{pr.org}/{pr.project}/commit/{pr.last_commit()['oid']}" | ||||
|         if len(failed_checks) > 0: | ||||
|             if reject_reason_score < 30000: | ||||
|                 reject_reason_score = 30000 | ||||
| @ -2289,14 +2156,14 @@ def categorize_checks( | ||||
| def merge( | ||||
|     pr: GitHubPR, | ||||
|     repo: GitRepo, | ||||
|     comment_id: int, | ||||
|     dry_run: bool = False, | ||||
|     skip_mandatory_checks: bool = False, | ||||
|     comment_id: Optional[int] = None, | ||||
|     timeout_minutes: int = 400, | ||||
|     stale_pr_days: int = 3, | ||||
|     ignore_current: bool = False, | ||||
| ) -> None: | ||||
|     initial_commit_sha = pr.last_commit_sha() | ||||
|     initial_commit_sha = pr.last_commit()["oid"] | ||||
|     pr_link = f"https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num}" | ||||
|     print(f"Attempting merge of {initial_commit_sha} ({pr_link})") | ||||
|  | ||||
| @ -2367,7 +2234,7 @@ def merge( | ||||
|             f"Attempting merge of https://github.com/{pr.org}/{pr.project}/pull/{pr.pr_num} ({elapsed_time / 60} minutes elapsed)" | ||||
|         ) | ||||
|         pr = GitHubPR(pr.org, pr.project, pr.pr_num) | ||||
|         if initial_commit_sha != pr.last_commit_sha(): | ||||
|         if initial_commit_sha != pr.last_commit()["oid"]: | ||||
|             raise RuntimeError( | ||||
|                 "New commits were pushed while merging. Please rerun the merge command." | ||||
|             ) | ||||
| @ -2534,7 +2401,7 @@ def main() -> None: | ||||
|     if args.check_mergeability: | ||||
|         if pr.is_ghstack_pr(): | ||||
|             get_ghstack_prs(repo, pr)  # raises error if out of sync | ||||
|         pr.merge_changes_locally( | ||||
|         pr.merge_changes( | ||||
|             repo, | ||||
|             skip_mandatory_checks=True, | ||||
|             skip_all_rule_checks=True, | ||||
| @ -2549,18 +2416,12 @@ def main() -> None: | ||||
|         gh_post_pr_comment(org, project, args.pr_num, message, dry_run=args.dry_run) | ||||
|         return | ||||
|     try: | ||||
|         # Ensure comment id is set, else fail | ||||
|         if not args.comment_id: | ||||
|             raise ValueError( | ||||
|                 "Comment ID is required for merging PRs, please provide it using --comment-id" | ||||
|             ) | ||||
|  | ||||
|         merge( | ||||
|             pr, | ||||
|             repo, | ||||
|             comment_id=args.comment_id, | ||||
|             dry_run=args.dry_run, | ||||
|             skip_mandatory_checks=args.force, | ||||
|             comment_id=args.comment_id, | ||||
|             ignore_current=args.ignore_current, | ||||
|         ) | ||||
|     except Exception as e: | ||||
| @ -2582,7 +2443,7 @@ def main() -> None: | ||||
|                 broken_trunk_checks=[], | ||||
|                 flaky_checks=[], | ||||
|                 unstable_checks=[], | ||||
|                 last_commit_sha=pr.last_commit_sha(default=""), | ||||
|                 last_commit_sha=pr.last_commit().get("oid", ""), | ||||
|                 merge_base_sha=pr.get_merge_base(), | ||||
|                 is_failed=True, | ||||
|                 skip_mandatory_checks=args.force, | ||||
|  | ||||
							
								
								
									
										3
									
								
								.github/scripts/windows/build_magma.bat
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.github/scripts/windows/build_magma.bat
									
									
									
									
										vendored
									
									
								
							| @ -35,9 +35,6 @@ cd magma | ||||
| mkdir build && cd build | ||||
|  | ||||
| set GPU_TARGET=All | ||||
| if "%CUVER_NODOT%" == "130" ( | ||||
|   set CUDA_ARCH_LIST=-gencode=arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120 | ||||
| ) | ||||
| if "%CUVER_NODOT%" == "129" ( | ||||
|   set CUDA_ARCH_LIST=-gencode=arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90 -gencode arch=compute_100,code=sm_100 -gencode arch=compute_120,code=sm_120 | ||||
| ) | ||||
|  | ||||
							
								
								
									
										16
									
								
								.github/scripts/windows/build_triton.bat
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								.github/scripts/windows/build_triton.bat
									
									
									
									
										vendored
									
									
								
							| @ -1,12 +1,18 @@ | ||||
| @echo on | ||||
|  | ||||
| set DESIRED_PYTHON=%PY_VERS% | ||||
| call .ci/pytorch/windows/internal/install_python.bat | ||||
|  | ||||
| set PYTHON_PREFIX=%PY_VERS:.=% | ||||
| set PYTHON_PREFIX=py%PYTHON_PREFIX:;=;py% | ||||
| call .ci/pytorch/win-test-helpers/installation-helpers/activate_miniconda3.bat | ||||
| :: Create a new conda environment | ||||
| if "%PY_VERS%" == "3.13t" ( | ||||
|     call conda create -n %PYTHON_PREFIX% -y -c=conda-forge python-freethreading python=3.13 | ||||
| ) else ( | ||||
|     call conda create -n %PYTHON_PREFIX% -y -c=conda-forge python=%PY_VERS% | ||||
| ) | ||||
| :: Fix cmake version for issue https://github.com/pytorch/pytorch/issues/150480 | ||||
| %PYTHON_EXEC% -m pip install wheel pybind11 certifi cython cmake==3.31.6 setuptools==72.1.0 ninja==1.11.1.4 | ||||
| call conda run -n %PYTHON_PREFIX% pip install wheel pybind11 certifi cython cmake==3.31.6 setuptools==72.1.0 ninja | ||||
|  | ||||
| dir "%VC_INSTALL_PATH%" | ||||
|  | ||||
| call "%VC_INSTALL_PATH%\VC\Auxiliary\Build\vcvarsall.bat" x64 | ||||
| %PYTHON_EXEC% .github/scripts/build_triton_wheel.py --device=%BUILD_DEVICE% %RELEASE% | ||||
| call conda run -n %PYTHON_PREFIX% python .github/scripts/build_triton_wheel.py --device=%BUILD_DEVICE% %RELEASE% | ||||
|  | ||||
							
								
								
									
										2
									
								
								.github/templates/common.yml.j2
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/templates/common.yml.j2
									
									
									
									
										vendored
									
									
								
							| @ -4,7 +4,7 @@ | ||||
| {%- set download_artifact_action = "actions/download-artifact@v4.1.7" -%} | ||||
|  | ||||
| {%- set timeout_minutes = 240 -%} | ||||
| {%- set timeout_minutes_windows_binary = 360 -%} | ||||
| {%- set timeout_minutes_windows_binary = 300 -%} | ||||
|  | ||||
| {%- macro concurrency(build_environment) -%} | ||||
| concurrency: | ||||
|  | ||||
| @ -114,12 +114,12 @@ jobs: | ||||
|       ALPINE_IMAGE: "docker.io/s390x/alpine" | ||||
|       {%- elif config["gpu_arch_type"] == "rocm" %} | ||||
|       runs_on: linux.rocm.gpu | ||||
|       {%- elif config["gpu_arch_type"] == "cuda" and config["gpu_arch_version"] in ["12.6"] %} | ||||
|       {%- elif config["gpu_arch_type"] == "cuda" and config["gpu_arch_version"] in ["12.8", "12.9"] %} | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.4xlarge.nvidia.gpu  # 12.6 build can use maxwell (sm_50) runner | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu  # 12.8 and 12.9 build need sm_70+ runner | ||||
|       {%- elif config["gpu_arch_type"] == "cuda" %} | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.g4dn.4xlarge.nvidia.gpu # 12.8+ builds need sm_70+ runner | ||||
|       runs_on: linux.4xlarge.nvidia.gpu # for other cuda versions, we use 4xlarge runner | ||||
|       {%- else %} | ||||
|       runner_prefix: "${{ needs.get-label-type.outputs.label-type }}" | ||||
|       runs_on: linux.4xlarge | ||||
| @ -135,7 +135,7 @@ jobs: | ||||
|       contents: read | ||||
|     steps: | ||||
|       - name: Setup XPU | ||||
|         uses: pytorch/pytorch/.github/actions/setup-xpu@main | ||||
|         uses: ./.github/actions/setup-xpu | ||||
|       - name: configure aws credentials | ||||
|         id: aws_creds | ||||
|         uses: aws-actions/configure-aws-credentials@v4 | ||||
| @ -171,7 +171,7 @@ jobs: | ||||
|       - name: Teardown XPU | ||||
|         uses: ./.github/actions/teardown-xpu | ||||
|     {%- else %} | ||||
|     runs-on: linux.rocm.gpu.gfx942.1 | ||||
|     runs-on: linux.rocm.gpu.mi250 | ||||
|     timeout-minutes: !{{ common.timeout_minutes }} | ||||
|     !{{ upload.binary_env(config) }} | ||||
|     steps: | ||||
|  | ||||
| @ -110,33 +110,12 @@ jobs: | ||||
|           # Create new "clean" conda environment for testing | ||||
|  | ||||
|           SMOKE_TEST_PARAMS="" | ||||
|  | ||||
|           EXTRA_CONDA_INSTALL_FLAGS="" | ||||
|           CONDA_ENV_CREATE_FLAGS="" | ||||
|           # shellcheck disable=SC2153 | ||||
|           case $DESIRED_PYTHON in | ||||
|             3.14t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.14) | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge/label/python_rc -c conda-forge" | ||||
|               desired_python="3.14.0rc1" | ||||
|               ;; | ||||
|             3.13t) | ||||
|               CONDA_ENV_CREATE_FLAGS="python-freethreading" | ||||
|               EXTRA_CONDA_INSTALL_FLAGS="-c conda-forge" | ||||
|               desired_python="3.13" | ||||
|               ;; | ||||
|             *) | ||||
|               # shellcheck disable=SC2153 | ||||
|               desired_python=${DESIRED_PYTHON} | ||||
|               ;; | ||||
|           esac | ||||
|  | ||||
|           # shellcheck disable=SC2086 | ||||
|           conda create -yn "test_conda_env" python="$desired_python" ${CONDA_ENV_CREATE_FLAGS} ${EXTRA_CONDA_INSTALL_FLAGS} | ||||
|           if [[ $DESIRED_PYTHON == "3.13t" ]]; then | ||||
|             conda create -yn "test_conda_env" python="3.13" python-freethreading -c conda-forge | ||||
|             SMOKE_TEST_PARAMS="--torch-compile-check disabled" | ||||
|           else | ||||
|             conda create -yn "test_conda_env" python="$DESIRED_PYTHON" | ||||
|           fi | ||||
|           conda activate test_conda_env | ||||
|           pip install "$PYTORCH_FINAL_PACKAGE_DIR"/*.whl numpy -v | ||||
|  | ||||
|  | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	