Link LazyLinalg with cusolver statically when needed (#79324 ) (#79522 )

By copy-n-pasting the static linking logic from `libtorch_cuda` if lazylinalg is not enabled Pull Request resolved: https://github.com/pytorch/pytorch/pull/79324 Approved by: https://github.com/atalman Co-authored-by: Nikita Shulga <nshulga@fb.com>
Add docs for Python Registration (#79481 )
2025-10-23 23:04:52 +08:00 · 2022-06-14 08:35:57 -07:00 · 2022-06-14 08:09:21 -04:00 · 2022-06-14 08:06:07 -04:00 · 2022-06-13 18:52:31 -04:00 · 2022-06-13 17:15:34 -04:00
22781 changed files with 1261369 additions and 2419616 deletions
--- a/.bazelignore
+++ b/.bazelignore
@ -1,4 +0,0 @@
-# We do not use this library in our Bazel build. It contains an
-# infinitely recursing symlink that makes Bazel very unhappy.
-third_party/ittapi/
-third_party/opentelemetry-cpp
--- a/.bazelrc
+++ b/.bazelrc
@ -1,8 +1,8 @@
-build --cxxopt=--std=c++17
+build --cxxopt=--std=c++14
 build --copt=-I.
 # Bazel does not support including its cc_library targets as system
 # headers. We work around this for generated code
-# (e.g. torch/headeronly/macros/cmake_macros.h) by making the generated directory a
+# (e.g. c10/macros/cmake_macros.h) by making the generated directory a
 # system include path.
 build --copt=-isystem --copt bazel-out/k8-fastbuild/bin
 build --copt=-isystem --copt bazel-out/darwin-fastbuild/bin
@ -13,102 +13,15 @@ build:no-tty --curses no
 build:no-tty --progress_report_interval 10
 build:no-tty --show_progress_rate_limit 10

-# Build with GPU support by default.
-build --define=cuda=true
-# rules_cuda configuration
-build --@rules_cuda//cuda:enable_cuda
-build --@rules_cuda//cuda:cuda_targets=sm_52
-build --@rules_cuda//cuda:compiler=nvcc
-build --repo_env=CUDA_PATH=/usr/local/cuda
-
-# Configuration to build without GPU support
-build:cpu-only --define=cuda=false
+# Configuration to build with GPU support
+build:gpu --define=cuda=true
 # define a separate build folder for faster switching between configs
-build:cpu-only --platform_suffix=-cpu-only
+build:gpu --platform_suffix=-gpu
 # See the note on the config-less build for details about why we are
-# doing this. We must also do it for the "-cpu-only" platform suffix.
-build --copt=-isystem --copt=bazel-out/k8-fastbuild-cpu-only/bin
+# doing this. We must also do it for the "-gpu" platform suffix.
+build --copt=-isystem --copt=bazel-out/k8-fastbuild-gpu/bin
 # rules_cuda configuration
-build:cpu-only --@rules_cuda//cuda:enable_cuda=False
-
-# Definition of --config=shell
-# interactive shell immediately before execution
-build:shell --run_under="//tools/bazel_tools:shellwrap"
-
-# Disable all warnings for external repositories. We don't care about
-# their warnings.
-build --per_file_copt=^external/@-w
-
-# Set additional warnings to error level.
-#
-# Implementation notes:
-#  * we use file extensions to determine if we are using the C++
-#    compiler or the cuda compiler
-#  * we use ^// at the start of the regex to only permit matching
-#    PyTorch files. This excludes external repos.
-#
-# Note that because this is logically a command-line flag, it is
-# considered the word on what warnings are enabled. This has the
-# unfortunate consequence of preventing us from disabling an error at
-# the target level because those flags will come before these flags in
-# the action invocation. Instead we provide per-file exceptions after
-# this.
-#
-# On the bright side, this means we don't have to more broadly apply
-# the exceptions to an entire target.
-#
-# Looking for CUDA flags? We have a cu_library macro that we can edit
-# directly. Look in //tools/rules:cu.bzl for details. Editing the
-# macro over this has the following advantages:
-#  * making changes does not require discarding the Bazel analysis
-#    cache
-#  * it allows for selective overrides on individual targets since the
-#    macro-level opts will come earlier than target level overrides
-
-build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=all
-# The following warnings come from -Wall. We downgrade them from error
-# to warnings here.
-#
-# We intentionally use #pragma unroll, which is compiler specific.
-build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-error=unknown-pragmas
-
-build --per_file_copt='^//.*\.(cpp|cc)$'@-Werror=extra
-# The following warnings come from -Wextra. We downgrade them from error
-# to warnings here.
-#
-# unused-parameter-compare has a tremendous amount of violations in the
-# codebase. It will be a lot of work to fix them, just disable it for
-# now.
-build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-parameter
-# missing-field-parameters has both a large number of violations in
-# the codebase, but it also is used pervasively in the Python C
-# API. There are a couple of catches though:
-# * we use multiple versions of the Python API and hence have
-#   potentially multiple different versions of each relevant
-#   struct. They may have different numbers of fields. It will be
-#   unwieldy to support multiple versions in the same source file.
-# * Python itself for many of these structs recommends only
-#   initializing a subset of the fields. We should respect the API
-#   usage conventions of our dependencies.
-#
-# Hence, we just disable this warning altogether. We may want to clean
-# up some of the clear-cut cases that could be risky, but we still
-# likely want to have this disabled for the most part.
-build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-missing-field-initializers
-
-build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-function
-build --per_file_copt='^//.*\.(cpp|cc)$'@-Wno-unused-variable
-
-build --per_file_copt='//:aten/src/ATen/RegisterCompositeExplicitAutograd\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterCompositeImplicitAutograd\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterMkldnnCPU\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorCPU\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterQuantizedCPU\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterSparseCPU\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterSparseCsrCPU\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterNestedTensorMeta\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterSparseMeta\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterQuantizedMeta\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:aten/src/ATen/RegisterZeroTensor\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:torch/csrc/lazy/generated/RegisterAutogradLazy\.cpp$'@-Wno-error=unused-function
-build --per_file_copt='//:torch/csrc/lazy/generated/RegisterLazy\.cpp$'@-Wno-error=unused-function
+build:gpu --@rules_cuda//cuda:enable_cuda
+build:gpu --@rules_cuda//cuda:cuda_targets=sm_52
+build:gpu --@rules_cuda//cuda:compiler=nvcc
+build:gpu --repo_env=CUDA_PATH=/usr/local/cuda
--- a/.bazelversion
+++ b/.bazelversion
@ -1 +1 @@
-6.5.0
+4.2.1
--- a/.bc-linter.yml
+++ b/.bc-linter.yml
@ -1,15 +0,0 @@
-version: 1
-paths:
-include:
-  - "**/*.py"
-exclude:
-  - ".*"
-  - ".*/**"
-  - "**/.*/**"
-  - "**/.*"
-  - "**/_*/**"
-  - "**/_*.py"
-  - "**/test/**"
-  - "**/benchmarks/**"
-  - "**/test_*.py"
-  - "**/*_test.py"
--- a/.buckconfig.oss
+++ b/.buckconfig.oss
@ -0,0 +1,15 @@
+[buildfile]
+name = BUILD.buck
+
+[repositories]
+  bazel_skylib = third_party/bazel-skylib/
+
+[download]
+  in_build = true
+
+[cxx]
+  cxxflags = -std=c++17
+  should_remap_host_platform = true
+
+[project]
+  default_flavors_mode=all
--- a/.ci/aarch64_linux/README.md
+++ b/.ci/aarch64_linux/README.md
@ -1,19 +0,0 @@
-# Aarch64 (ARM/Graviton) Support Scripts
-Scripts for building aarch64 PyTorch PIP Wheels. These scripts build the following wheels:
-* torch
-* torchvision
-* torchaudio
-* torchtext
-* torchdata
-## Aarch64_ci_build.sh
-This script is design to support CD operations within PyPi manylinux aarch64 container, and be executed in the container. It prepares the container and then executes __aarch64_wheel_ci_build.py__ to build the wheels. The script "assumes" the PyTorch repo is located at: ```/pytorch``` and will put the wheels into ```/artifacts```.
-### Usage
-```DESIRED_PYTHON=<PythonVersion> aarch64_ci_build.sh```
-
-__NOTE:__ CI build is currently __EXPERMINTAL__
-
-## Build_aarch64_wheel.py
-This app allows a person to build using AWS EC3 resources and requires AWS-CLI and Boto3 with AWS credentials to support building EC2 instances for the wheel builds. Can be used in a codebuild CD or from a local system.
-
-### Usage
-```build_aarch64_wheel.py --key-name <YourPemKey> --use-docker --python 3.8 --branch <RCtag>```
--- a/.ci/aarch64_linux/aarch64_ci_build.sh
+++ b/.ci/aarch64_linux/aarch64_ci_build.sh
@ -1,49 +0,0 @@
-#!/bin/bash
-set -eux -o pipefail
-
-GPU_ARCH_VERSION=${GPU_ARCH_VERSION:-}
-
-# Set CUDA architecture lists to match x86 build_cuda.sh
-if [[ "$GPU_ARCH_VERSION" == *"12.6"* ]]; then
-    export TORCH_CUDA_ARCH_LIST="8.0;9.0"
-elif [[ "$GPU_ARCH_VERSION" == *"12.8"* ]]; then
-    export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;12.0"
-elif [[ "$GPU_ARCH_VERSION" == *"13.0"* ]]; then
-    export TORCH_CUDA_ARCH_LIST="8.0;9.0;10.0;11.0;12.0+PTX"
-fi
-
-# Compress the fatbin with -compress-mode=size for CUDA 13
-if [[ "$DESIRED_CUDA" == *"13"* ]]; then
-    export TORCH_NVCC_FLAGS="-compress-mode=size"
-fi
-
-SCRIPTPATH="$( cd -- "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )"
-source $SCRIPTPATH/aarch64_ci_setup.sh
-
-###############################################################################
-# Run aarch64 builder python
-###############################################################################
-cd /
-# adding safe directory for git as the permissions will be
-# on the mounted pytorch repo
-git config --global --add safe.directory /pytorch
-pip install -r /pytorch/requirements.txt
-pip install auditwheel==6.2.0 wheel
-if [ "$DESIRED_CUDA" = "cpu" ]; then
-    echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
-    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
-else
-    echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
-    export USE_SYSTEM_NCCL=1
-
-    # Check if we should use NVIDIA libs from PyPI (similar to x86 build_cuda.sh logic)
-    if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
-        echo "Bundling CUDA libraries with wheel for aarch64."
-    else
-        echo "Using nvidia libs from pypi for aarch64."
-        echo "Updated PYTORCH_EXTRA_INSTALL_REQUIREMENTS for aarch64: $PYTORCH_EXTRA_INSTALL_REQUIREMENTS"
-        export USE_NVIDIA_PYPI_LIBS=1
-    fi
-
-    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
-fi
--- a/.ci/aarch64_linux/aarch64_ci_setup.sh
+++ b/.ci/aarch64_linux/aarch64_ci_setup.sh
@ -1,21 +0,0 @@
-#!/bin/bash
-set -eux -o pipefail
-
-# This script is used to prepare the Docker container for aarch64_ci_wheel_build.py python script
-# By creating symlinks from desired /opt/python to /usr/local/bin/
-
-NUMPY_VERSION=2.0.2
-if [[ "$DESIRED_PYTHON"  == "3.13" || "$DESIRED_PYTHON" == "3.13t" ]]; then
-    NUMPY_VERSION=2.1.2
-fi
-
-SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
-source $SCRIPTPATH/../manywheel/set_desired_python.sh
-
-pip install -q numpy==${NUMPY_VERSION} pyyaml==6.0.2 scons==4.7.0 ninja==1.11.1 patchelf==0.17.2
-
-for tool in python python3 pip pip3 ninja scons patchelf; do
-    ln -sf ${DESIRED_PYTHON_BIN_DIR}/${tool} /usr/local/bin;
-done
-
-python --version
--- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py
@ -1,382 +0,0 @@
-#!/usr/bin/env python3
-# encoding: UTF-8
-
-import os
-import shutil
-from subprocess import check_call, check_output
-
-
-def list_dir(path: str) -> list[str]:
-    """'
-    Helper for getting paths for Python
-    """
-    return check_output(["ls", "-1", path]).decode().split("\n")
-
-
-def build_ArmComputeLibrary() -> None:
-    """
-    Using ArmComputeLibrary for aarch64 PyTorch
-    """
-    print("Building Arm Compute Library")
-    acl_build_flags = [
-        "debug=0",
-        "neon=1",
-        "opencl=0",
-        "os=linux",
-        "openmp=1",
-        "cppthreads=0",
-        "arch=armv8a",
-        "multi_isa=1",
-        "fixed_format_kernels=1",
-        "build=native",
-    ]
-    acl_install_dir = "/acl"
-    acl_checkout_dir = os.getenv("ACL_SOURCE_DIR", "ComputeLibrary")
-    if os.path.isdir(acl_install_dir):
-        shutil.rmtree(acl_install_dir)
-    if not os.path.isdir(acl_checkout_dir) or not len(os.listdir(acl_checkout_dir)):
-        check_call(
-            [
-                "git",
-                "clone",
-                "https://github.com/ARM-software/ComputeLibrary.git",
-                "-b",
-                "v25.02",
-                "--depth",
-                "1",
-                "--shallow-submodules",
-            ]
-        )
-
-    check_call(
-        ["scons", "Werror=1", f"-j{os.cpu_count()}"] + acl_build_flags,
-        cwd=acl_checkout_dir,
-    )
-    for d in ["arm_compute", "include", "utils", "support", "src", "build"]:
-        shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}")
-
-
-def replace_tag(filename) -> None:
-    with open(filename) as f:
-        lines = f.readlines()
-    for i, line in enumerate(lines):
-        if line.startswith("Tag:"):
-            lines[i] = line.replace("-linux_", "-manylinux_2_28_")
-            print(f"Updated tag from {line} to {lines[i]}")
-            break
-
-    with open(filename, "w") as f:
-        f.writelines(lines)
-
-
-def patch_library_rpath(
-    folder: str,
-    lib_name: str,
-    use_nvidia_pypi_libs: bool = False,
-    desired_cuda: str = "",
-) -> None:
-    """Apply patchelf to set RPATH for a library in torch/lib"""
-    lib_path = f"{folder}/tmp/torch/lib/{lib_name}"
-
-    if use_nvidia_pypi_libs:
-        # For PyPI NVIDIA libraries, construct CUDA RPATH
-        cuda_rpaths = [
-            "$ORIGIN/../../nvidia/cudnn/lib",
-            "$ORIGIN/../../nvidia/nvshmem/lib",
-            "$ORIGIN/../../nvidia/nccl/lib",
-            "$ORIGIN/../../nvidia/cusparselt/lib",
-        ]
-
-        if "130" in desired_cuda:
-            cuda_rpaths.append("$ORIGIN/../../nvidia/cu13/lib")
-        else:
-            cuda_rpaths.extend(
-                [
-                    "$ORIGIN/../../nvidia/cublas/lib",
-                    "$ORIGIN/../../nvidia/cuda_cupti/lib",
-                    "$ORIGIN/../../nvidia/cuda_nvrtc/lib",
-                    "$ORIGIN/../../nvidia/cuda_runtime/lib",
-                    "$ORIGIN/../../nvidia/cufft/lib",
-                    "$ORIGIN/../../nvidia/curand/lib",
-                    "$ORIGIN/../../nvidia/cusolver/lib",
-                    "$ORIGIN/../../nvidia/cusparse/lib",
-                    "$ORIGIN/../../nvidia/nvtx/lib",
-                    "$ORIGIN/../../nvidia/cufile/lib",
-                ]
-            )
-
-        # Add $ORIGIN for local torch libs
-        rpath = ":".join(cuda_rpaths) + ":$ORIGIN"
-    else:
-        # For bundled libraries, just use $ORIGIN
-        rpath = "$ORIGIN"
-
-    if os.path.exists(lib_path):
-        os.system(
-            f"cd {folder}/tmp/torch/lib/; "
-            f"patchelf --set-rpath '{rpath}' --force-rpath {lib_name}"
-        )
-
-
-def copy_and_patch_library(
-    src_path: str,
-    folder: str,
-    use_nvidia_pypi_libs: bool = False,
-    desired_cuda: str = "",
-) -> None:
-    """Copy a library to torch/lib and patch its RPATH"""
-    if os.path.exists(src_path):
-        lib_name = os.path.basename(src_path)
-        shutil.copy2(src_path, f"{folder}/tmp/torch/lib/{lib_name}")
-        patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda)
-
-
-def package_cuda_wheel(wheel_path, desired_cuda) -> None:
-    """
-    Package the cuda wheel libraries
-    """
-    folder = os.path.dirname(wheel_path)
-    os.mkdir(f"{folder}/tmp")
-    os.system(f"unzip {wheel_path} -d {folder}/tmp")
-    # Delete original wheel since it will be repackaged
-    os.system(f"rm {wheel_path}")
-
-    # Check if we should use PyPI NVIDIA libraries or bundle system libraries
-    use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1"
-
-    if use_nvidia_pypi_libs:
-        print("Using nvidia libs from pypi - skipping CUDA library bundling")
-        # For PyPI approach, we don't bundle CUDA libraries - they come from PyPI packages
-        # We only need to bundle non-NVIDIA libraries
-        minimal_libs_to_copy = [
-            "/lib64/libgomp.so.1",
-            "/usr/lib64/libgfortran.so.5",
-            "/acl/build/libarm_compute.so",
-            "/acl/build/libarm_compute_graph.so",
-            "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
-            "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
-            "/usr/local/lib/libnvpl_lapack_core.so.0",
-            "/usr/local/lib/libnvpl_blas_core.so.0",
-        ]
-
-        # Copy minimal libraries to unzipped_folder/torch/lib
-        for lib_path in minimal_libs_to_copy:
-            copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda)
-
-        # Patch torch libraries used for searching libraries
-        torch_libs_to_patch = [
-            "libtorch.so",
-            "libtorch_cpu.so",
-            "libtorch_cuda.so",
-            "libtorch_cuda_linalg.so",
-            "libtorch_global_deps.so",
-            "libtorch_python.so",
-            "libtorch_nvshmem.so",
-            "libc10.so",
-            "libc10_cuda.so",
-            "libcaffe2_nvrtc.so",
-            "libshm.so",
-        ]
-        for lib_name in torch_libs_to_patch:
-            patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda)
-    else:
-        print("Bundling CUDA libraries with wheel")
-        # Original logic for bundling system CUDA libraries
-        # Common libraries for all CUDA versions
-        common_libs = [
-            # Non-NVIDIA system libraries
-            "/lib64/libgomp.so.1",
-            "/usr/lib64/libgfortran.so.5",
-            "/acl/build/libarm_compute.so",
-            "/acl/build/libarm_compute_graph.so",
-            # Common CUDA libraries (same for all versions)
-            "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0",
-            "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0",
-            "/usr/local/lib/libnvpl_lapack_core.so.0",
-            "/usr/local/lib/libnvpl_blas_core.so.0",
-            "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so",
-            "/usr/local/cuda/lib64/libcudnn.so.9",
-            "/usr/local/cuda/lib64/libcusparseLt.so.0",
-            "/usr/local/cuda/lib64/libcurand.so.10",
-            "/usr/local/cuda/lib64/libnccl.so.2",
-            "/usr/local/cuda/lib64/libnvshmem_host.so.3",
-            "/usr/local/cuda/lib64/libcudnn_adv.so.9",
-            "/usr/local/cuda/lib64/libcudnn_cnn.so.9",
-            "/usr/local/cuda/lib64/libcudnn_graph.so.9",
-            "/usr/local/cuda/lib64/libcudnn_ops.so.9",
-            "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9",
-            "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9",
-            "/usr/local/cuda/lib64/libcudnn_heuristic.so.9",
-            "/usr/local/cuda/lib64/libcufile.so.0",
-            "/usr/local/cuda/lib64/libcufile_rdma.so.1",
-            "/usr/local/cuda/lib64/libcusparse.so.12",
-        ]
-
-        # CUDA version-specific libraries
-        if "13" in desired_cuda:
-            minor_version = desired_cuda[-1]
-            version_specific_libs = [
-                "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13",
-                "/usr/local/cuda/lib64/libcublas.so.13",
-                "/usr/local/cuda/lib64/libcublasLt.so.13",
-                "/usr/local/cuda/lib64/libcudart.so.13",
-                "/usr/local/cuda/lib64/libcufft.so.12",
-                "/usr/local/cuda/lib64/libcusolver.so.12",
-                "/usr/local/cuda/lib64/libnvJitLink.so.13",
-                "/usr/local/cuda/lib64/libnvrtc.so.13",
-                f"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.{minor_version}",
-            ]
-        elif "12" in desired_cuda:
-            # Get the last character for libnvrtc-builtins version (e.g., "129" -> "9")
-            minor_version = desired_cuda[-1]
-            version_specific_libs = [
-                "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12",
-                "/usr/local/cuda/lib64/libcublas.so.12",
-                "/usr/local/cuda/lib64/libcublasLt.so.12",
-                "/usr/local/cuda/lib64/libcudart.so.12",
-                "/usr/local/cuda/lib64/libcufft.so.11",
-                "/usr/local/cuda/lib64/libcusolver.so.11",
-                "/usr/local/cuda/lib64/libnvJitLink.so.12",
-                "/usr/local/cuda/lib64/libnvrtc.so.12",
-                f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}",
-            ]
-        else:
-            raise ValueError(f"Unsupported CUDA version: {desired_cuda}.")
-
-        # Combine all libraries
-        libs_to_copy = common_libs + version_specific_libs
-
-        # Copy libraries to unzipped_folder/torch/lib
-        for lib_path in libs_to_copy:
-            copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda)
-
-    # Make sure the wheel is tagged with manylinux_2_28
-    for f in os.scandir(f"{folder}/tmp/"):
-        if f.is_dir() and f.name.endswith(".dist-info"):
-            replace_tag(f"{f.path}/WHEEL")
-            break
-
-    os.system(f"wheel pack {folder}/tmp/ -d {folder}")
-    os.system(f"rm -rf {folder}/tmp/")
-
-
-def complete_wheel(folder: str) -> str:
-    """
-    Complete wheel build and put in artifact location
-    """
-    wheel_name = list_dir(f"/{folder}/dist")[0]
-
-    # Please note for cuda we don't run auditwheel since we use custom script to package
-    # the cuda dependencies to the wheel file using update_wheel() method.
-    # However we need to make sure filename reflects the correct Manylinux platform.
-    if "pytorch" in folder and not enable_cuda:
-        print("Repairing Wheel with AuditWheel")
-        check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder)
-        repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0]
-
-        print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist")
-        os.rename(
-            f"/{folder}/wheelhouse/{repaired_wheel_name}",
-            f"/{folder}/dist/{repaired_wheel_name}",
-        )
-    else:
-        repaired_wheel_name = list_dir(f"/{folder}/dist")[0]
-
-    print(f"Copying {repaired_wheel_name} to artifacts")
-    shutil.copy2(
-        f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}"
-    )
-
-    return repaired_wheel_name
-
-
-def parse_arguments():
-    """
-    Parse inline arguments
-    """
-    from argparse import ArgumentParser
-
-    parser = ArgumentParser("AARCH64 wheels python CD")
-    parser.add_argument("--debug", action="store_true")
-    parser.add_argument("--build-only", action="store_true")
-    parser.add_argument("--test-only", type=str)
-    parser.add_argument("--enable-mkldnn", action="store_true")
-    parser.add_argument("--enable-cuda", action="store_true")
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    """
-    Entry Point
-    """
-    args = parse_arguments()
-    enable_mkldnn = args.enable_mkldnn
-    enable_cuda = args.enable_cuda
-    branch = check_output(
-        ["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd="/pytorch"
-    ).decode()
-
-    print("Building PyTorch wheel")
-    build_vars = ""
-    # MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
-    if enable_cuda:
-        build_vars += "MAX_JOBS=5 "
-
-        # Handle PyPI NVIDIA libraries vs bundled libraries
-        use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1"
-        if use_nvidia_pypi_libs:
-            print("Configuring build for PyPI NVIDIA libraries")
-            # Configure for dynamic linking (matching x86 logic)
-            build_vars += "ATEN_STATIC_CUDA=0 USE_CUDA_STATIC_LINK=0 USE_CUPTI_SO=1 "
-        else:
-            print("Configuring build for bundled NVIDIA libraries")
-            # Keep existing static linking approach - already configured above
-
-    override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION")
-    desired_cuda = os.getenv("DESIRED_CUDA")
-    if override_package_version is not None:
-        version = override_package_version
-        build_vars += (
-            f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 "
-        )
-    elif branch in ["nightly", "main"]:
-        build_date = (
-            check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch")
-            .decode()
-            .replace("-", "")
-        )
-        version = (
-            check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2]
-        )
-        if enable_cuda:
-            build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date}+{desired_cuda} PYTORCH_BUILD_NUMBER=1 "
-        else:
-            build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 "
-    elif branch.startswith(("v1.", "v2.")):
-        build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 "
-
-    if enable_mkldnn:
-        build_ArmComputeLibrary()
-        print("build pytorch with mkldnn+acl backend")
-        build_vars += (
-            "USE_MKLDNN=ON USE_MKLDNN_ACL=ON "
-            "ACL_ROOT_DIR=/acl "
-            "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH "
-            "ACL_INCLUDE_DIR=/acl/build "
-            "ACL_LIBRARY=/acl/build "
-        )
-        if enable_cuda:
-            build_vars += "BLAS=NVPL "
-        else:
-            build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS "
-    else:
-        print("build pytorch without mkldnn backend")
-
-    os.system(f"cd /pytorch; {build_vars} python3 setup.py bdist_wheel")
-    if enable_cuda:
-        print("Updating Cuda Dependency")
-        filename = os.listdir("/pytorch/dist/")
-        wheel_path = f"/pytorch/dist/{filename[0]}"
-        package_cuda_wheel(wheel_path, desired_cuda)
-    pytorch_wheel_name = complete_wheel("/pytorch/")
-    print(f"Build Complete. Created {pytorch_wheel_name}..")
--- a/.ci/aarch64_linux/build_aarch64_wheel.py
+++ b/.ci/aarch64_linux/build_aarch64_wheel.py
--- a/.ci/aarch64_linux/embed_library.py
+++ b/.ci/aarch64_linux/embed_library.py
@ -1,87 +0,0 @@
-#!/usr/bin/env python3
-
-import os
-import shutil
-import sys
-from subprocess import check_call
-from tempfile import TemporaryDirectory
-
-from auditwheel.elfutils import elf_file_filter
-from auditwheel.lddtree import lddtree
-from auditwheel.patcher import Patchelf
-from auditwheel.repair import copylib
-from auditwheel.wheeltools import InWheelCtx
-
-
-def replace_tag(filename):
-    with open(filename) as f:
-        lines = f.read().split("\\n")
-    for i, line in enumerate(lines):
-        if not line.startswith("Tag: "):
-            continue
-        lines[i] = line.replace("-linux_", "-manylinux2014_")
-        print(f"Updated tag from {line} to {lines[i]}")
-
-    with open(filename, "w") as f:
-        f.write("\\n".join(lines))
-
-
-class AlignedPatchelf(Patchelf):
-    def set_soname(self, file_name: str, new_soname: str) -> None:
-        check_call(
-            ["patchelf", "--page-size", "65536", "--set-soname", new_soname, file_name]
-        )
-
-    def replace_needed(self, file_name: str, soname: str, new_soname: str) -> None:
-        check_call(
-            [
-                "patchelf",
-                "--page-size",
-                "65536",
-                "--replace-needed",
-                soname,
-                new_soname,
-                file_name,
-            ]
-        )
-
-
-def embed_library(whl_path, lib_soname, update_tag=False):
-    patcher = AlignedPatchelf()
-    out_dir = TemporaryDirectory()
-    whl_name = os.path.basename(whl_path)
-    tmp_whl_name = os.path.join(out_dir.name, whl_name)
-    with InWheelCtx(whl_path) as ctx:
-        torchlib_path = os.path.join(ctx._tmpdir.name, "torch", "lib")
-        ctx.out_wheel = tmp_whl_name
-        new_lib_path, new_lib_soname = None, None
-        for filename, _ in elf_file_filter(ctx.iter_files()):
-            if not filename.startswith("torch/lib"):
-                continue
-            libtree = lddtree(filename)
-            if lib_soname not in libtree["needed"]:
-                continue
-            lib_path = libtree["libs"][lib_soname]["path"]
-            if lib_path is None:
-                print(f"Can't embed {lib_soname} as it could not be found")
-                break
-            if lib_path.startswith(torchlib_path):
-                continue
-
-            if new_lib_path is None:
-                new_lib_soname, new_lib_path = copylib(lib_path, torchlib_path, patcher)
-            patcher.replace_needed(filename, lib_soname, new_lib_soname)
-            print(f"Replacing {lib_soname} with {new_lib_soname} for {filename}")
-        if update_tag:
-            # Add manylinux2014 tag
-            for filename in ctx.iter_files():
-                if os.path.basename(filename) != "WHEEL":
-                    continue
-                replace_tag(filename)
-    shutil.move(tmp_whl_name, whl_path)
-
-
-if __name__ == "__main__":
-    embed_library(
-        sys.argv[1], "libgomp.so.1", len(sys.argv) > 2 and sys.argv[2] == "--update-tag"
-    )
--- a/.ci/caffe2/README.md
+++ b/.ci/caffe2/README.md
@ -1,12 +0,0 @@
-# Jenkins
-
-The scripts in this directory are the entrypoint for testing Caffe2.
-
-The environment variable `BUILD_ENVIRONMENT` is expected to be set to
-the build environment you intend to test. It is a hint for the build
-and test scripts to configure Caffe2 a certain way and include/exclude
-tests. Docker images, they equal the name of the image itself. For
-example: `py2-cuda9.0-cudnn7-ubuntu16.04`. The Docker images that are
-built on Jenkins and are used in triggered builds already have this
-environment variable set in their manifest. Also see
-`./docker/jenkins/*/Dockerfile` and search for `BUILD_ENVIRONMENT`.
--- a/.ci/caffe2/common.sh
+++ b/.ci/caffe2/common.sh
@ -1,36 +0,0 @@
-set -ex
-
-LOCAL_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
-ROOT_DIR=$(cd "$LOCAL_DIR"/../.. && pwd)
-TEST_DIR="$ROOT_DIR/test"
-gtest_reports_dir="${TEST_DIR}/test-reports/cpp"
-pytest_reports_dir="${TEST_DIR}/test-reports/python"
-
-# Figure out which Python to use
-PYTHON="$(which python)"
-if [[ "${BUILD_ENVIRONMENT}" =~ py((2|3)\.?[0-9]?\.?[0-9]?) ]]; then
-  PYTHON=$(which "python${BASH_REMATCH[1]}")
-fi
-
-if [[ "${BUILD_ENVIRONMENT}" == *rocm* ]]; then
-    # HIP_PLATFORM is auto-detected by hipcc; unset to avoid build errors
-    unset HIP_PLATFORM
-    if which sccache > /dev/null; then
-        # Save sccache logs to file
-        sccache --stop-server || true
-        rm -f ~/sccache_error.log || true
-        SCCACHE_ERROR_LOG=~/sccache_error.log SCCACHE_IDLE_TIMEOUT=0 sccache --start-server
-
-        # Report sccache stats for easier debugging
-        sccache --zero-stats
-    fi
-fi
-
-# /usr/local/caffe2 is where the cpp bits are installed to in cmake-only
-# builds. In +python builds the cpp tests are copied to /usr/local/caffe2 so
-# that the test code in .ci/test.sh is the same
-INSTALL_PREFIX="/usr/local/caffe2"
-
-mkdir -p "$gtest_reports_dir" || true
-mkdir -p "$pytest_reports_dir" || true
-mkdir -p "$INSTALL_PREFIX" || true
--- a/.ci/caffe2/test.sh
+++ b/.ci/caffe2/test.sh
@ -1,168 +0,0 @@
-#!/bin/bash
-
-# shellcheck source=./common.sh
-source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
-
-if [[ ${BUILD_ENVIRONMENT} == *onnx* ]]; then
-  pip install click mock tabulate networkx==2.0
-  pip -q install "file:///var/lib/jenkins/workspace/third_party/onnx#egg=onnx"
-fi
-
-# Skip tests in environments where they are not built/applicable
-if [[ "${BUILD_ENVIRONMENT}" == *-android* ]]; then
-  echo 'Skipping tests'
-  exit 0
-fi
-# These additional packages are needed for circleci ROCm builds.
-if [[ $BUILD_ENVIRONMENT == *rocm* ]]; then
-    # Need networkx 2.0 because bellmand_ford was moved in 2.1 . Scikit-image by
-    # defaults installs the most recent networkx version, so we install this lower
-    # version explicitly before scikit-image pulls it in as a dependency
-    pip install networkx==2.0
-    # click - onnx
-    pip install --progress-bar off click protobuf tabulate virtualenv mock typing-extensions
-fi
-
-# Find where cpp tests and Caffe2 itself are installed
-if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
-  # For cmake only build we install everything into /usr/local
-  cpp_test_dir="$INSTALL_PREFIX/cpp_test"
-  ld_library_path="$INSTALL_PREFIX/lib"
-else
-  # For Python builds we install into python
-  # cd to /usr first so the python import doesn't get confused by any 'caffe2'
-  # directory in cwd
-  python_installation="$(dirname $(dirname $(cd /usr && $PYTHON -c 'import os; import caffe2; print(os.path.realpath(caffe2.__file__))')))"
-  caffe2_pypath="$python_installation/caffe2"
-  cpp_test_dir="$python_installation/torch/test"
-  ld_library_path="$python_installation/torch/lib"
-fi
-
-################################################################################
-# C++ tests #
-################################################################################
-# Only run cpp tests in the first shard, don't run cpp tests a second time in the second shard
-if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
-  echo "Running C++ tests.."
-  for test in $(find "$cpp_test_dir" -executable -type f); do
-    case "$test" in
-      # skip tests we know are hanging or bad
-      */mkl_utils_test|*/aten/integer_divider_test)
-        continue
-        ;;
-      */scalar_tensor_test|*/basic|*/native_test)
-        if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
-          continue
-        else
-          LD_LIBRARY_PATH="$ld_library_path" "$test"
-        fi
-        ;;
-      */*_benchmark)
-        LD_LIBRARY_PATH="$ld_library_path" "$test" --benchmark_color=false
-        ;;
-      *)
-        # Currently, we use a mixture of gtest (caffe2) and Catch2 (ATen). While
-        # planning to migrate to gtest as the common PyTorch c++ test suite, we
-        # currently do NOT use the xml test reporter, because Catch doesn't
-        # support multiple reporters
-        # c.f. https://github.com/catchorg/Catch2/blob/master/docs/release-notes.md#223
-        # which means that enabling XML output means you lose useful stdout
-        # output for Jenkins.  It's more important to have useful console
-        # output than it is to have XML output for Jenkins.
-        # Note: in the future, if we want to use xml test reporter once we switch
-        # to all gtest, one can simply do:
-        LD_LIBRARY_PATH="$ld_library_path" \
-            "$test" --gtest_output=xml:"$gtest_reports_dir/$(basename $test).xml"
-        ;;
-    esac
-  done
-fi
-
-################################################################################
-# Python tests #
-################################################################################
-if [[ "$BUILD_ENVIRONMENT" == *cmake* ]]; then
-  exit 0
-fi
-
-# If pip is installed as root, we must use sudo.
-# CircleCI docker images could install conda as jenkins user, or use the OS's python package.
-PIP=$(which pip)
-PIP_USER=$(stat --format '%U' $PIP)
-CURRENT_USER=$(id -u -n)
-if [[ "$PIP_USER" = root && "$CURRENT_USER" != root ]]; then
-  MAYBE_SUDO=sudo
-fi
-
-# Uninstall pre-installed hypothesis and coverage to use an older version as newer
-# versions remove the timeout parameter from settings which ideep/conv_transpose_test.py uses
-$MAYBE_SUDO pip -q uninstall -y hypothesis
-$MAYBE_SUDO pip -q uninstall -y coverage
-
-# "pip install hypothesis==3.44.6" from official server is unreliable on
-# CircleCI, so we host a copy on S3 instead
-$MAYBE_SUDO pip -q install attrs==18.1.0 -f https://s3.amazonaws.com/ossci-linux/wheels/attrs-18.1.0-py2.py3-none-any.whl
-$MAYBE_SUDO pip -q install coverage==4.5.1 -f https://s3.amazonaws.com/ossci-linux/wheels/coverage-4.5.1-cp36-cp36m-macosx_10_12_x86_64.whl
-$MAYBE_SUDO pip -q install hypothesis==3.44.6 -f https://s3.amazonaws.com/ossci-linux/wheels/hypothesis-3.44.6-py3-none-any.whl
-
-# Collect additional tests to run (outside caffe2/python)
-EXTRA_TESTS=()
-
-# CUDA builds always include NCCL support
-if [[ "$BUILD_ENVIRONMENT" == *-cuda* ]] || [[ "$BUILD_ENVIRONMENT" == *-rocm* ]]; then
-  EXTRA_TESTS+=("$caffe2_pypath/contrib/nccl")
-fi
-
-rocm_ignore_test=()
-if [[ $BUILD_ENVIRONMENT == *-rocm* ]]; then
-  # Currently these tests are failing on ROCM platform:
-
-  # On ROCm, RCCL (distributed) development isn't complete.
-  # https://github.com/ROCmSoftwarePlatform/rccl
-  rocm_ignore_test+=("--ignore $caffe2_pypath/python/data_parallel_model_test.py")
-
-  # This test has been flaky in ROCm CI (but note the tests are
-  # cpu-only so should be unrelated to ROCm)
-  rocm_ignore_test+=("--ignore $caffe2_pypath/python/operator_test/blobs_queue_db_test.py")
-  # This test is skipped on Jenkins(compiled without MKL) and otherwise known flaky
-  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/convfusion_op_test.py")
-  # This test is skipped on Jenkins(compiled without MKL) and causing segfault on Circle
-  rocm_ignore_test+=("--ignore $caffe2_pypath/python/ideep/pool_op_test.py")
-fi
-
-echo "Running Python tests.."
-# locale setting is required by click package
-for loc in "en_US.utf8" "C.UTF-8"; do
-  if locale -a | grep "$loc" >/dev/null 2>&1; then
-    export LC_ALL="$loc"
-    export LANG="$loc"
-    break;
-  fi
-done
-
-# Some Caffe2 tests fail when run using AVX512 ISA, see https://github.com/pytorch/pytorch/issues/66111
-export DNNL_MAX_CPU_ISA=AVX2
-
-# Should still run even in the absence of SHARD_NUMBER
-if [[ "${SHARD_NUMBER:-1}" == "1" ]]; then
-  # TODO(sdym@meta.com) remove this when the linked issue resolved.
-  # py is temporary until https://github.com/Teemu/pytest-sugar/issues/241 is fixed
-  pip install py==1.11.0
-  pip install pytest-sugar
-  # NB: Warnings are disabled because they make it harder to see what
-  # the actual erroring test is
-  "$PYTHON" \
-    -m pytest \
-    -x \
-    -v \
-    --disable-warnings \
-    --junit-xml="$pytest_reports_dir/result.xml" \
-    --ignore "$caffe2_pypath/python/test/executor_test.py" \
-    --ignore "$caffe2_pypath/python/operator_test/matmul_op_test.py" \
-    --ignore "$caffe2_pypath/python/operator_test/pack_ops_test.py" \
-    --ignore "$caffe2_pypath/python/mkl/mkl_sbn_speed_test.py" \
-    --ignore "$caffe2_pypath/python/trt/test_pt_onnx_trt.py" \
-    ${rocm_ignore_test[@]} \
-    "$caffe2_pypath/python" \
-    "${EXTRA_TESTS[@]}"
-fi
--- a/.ci/docker/README.md
+++ b/.ci/docker/README.md
@ -1,139 +0,0 @@
-# Docker images for GitHub CI and CD
-
-This directory contains everything needed to build the Docker images
-that are used in our CI.
-
-The Dockerfiles located in subdirectories are parameterized to
-conditionally run build stages depending on build arguments passed to
-`docker build`. This lets us use only a few Dockerfiles for many
-images. The different configurations are identified by a freeform
-string that we call a _build environment_. This string is persisted in
-each image as the `BUILD_ENVIRONMENT` environment variable.
-
-See `build.sh` for valid build environments (it's the giant switch).
-
-## Docker CI builds
-
-* `build.sh` -- dispatch script to launch all builds
-* `common` -- scripts used to execute individual Docker build stages
-* `ubuntu` -- Dockerfile for Ubuntu image for CPU build and test jobs
-* `ubuntu-cuda` -- Dockerfile for Ubuntu image with CUDA support for nvidia-docker
-* `ubuntu-rocm` -- Dockerfile for Ubuntu image with ROCm support
-* `ubuntu-xpu` -- Dockerfile for Ubuntu image with XPU support
-
-### Docker CD builds
-
-* `conda` - Dockerfile and build.sh to build Docker images used in nightly conda builds
-* `manywheel` - Dockerfile and build.sh to build Docker images used in nightly manywheel builds
-* `libtorch` - Dockerfile and build.sh to build Docker images used in nightly libtorch builds
-
-## Usage
-
-```bash
-# Build a specific image
-./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
-
-# Set flags (see build.sh) and build image
-sudo bash -c 'TRITON=1 ./build.sh pytorch-linux-bionic-py3.8-gcc9 -t myimage:latest
-```
-
-## [Guidance] Adding a New Base Docker Image
-
-### Background
-
-The base Docker images in directory `.ci/docker/` are built by the `docker-builds.yml` workflow. Those images are used throughout the PyTorch CI/CD pipeline. You should only create or modify a base Docker image if you need specific environment changes or dependencies before building PyTorch on CI.
-
-1. **Automatic Rebuilding**:
-   - The Docker image building process is triggered automatically when changes are made to files in the `.ci/docker/*` directory
-   - This ensures all images stay up-to-date with the latest dependencies and configurations
-
-2. **Image Reuse in PyTorch Build Workflows** (example: linux-build):
-   - The images generated by `docker-builds.yml` are reused in `_linux-build.yml` through the `calculate-docker-image` step
-   - The `_linux-build.yml` workflow:
-     - Pulls the Docker image determined by the `calculate-docker-image` step
-     - Runs a Docker container with that image
-     - Executes `.ci/pytorch/build.sh` inside the container to build PyTorch
-
-3. **Usage in Test Workflows** (example: linux-test):
-   - The same Docker images are also used in `_linux-test.yml` for running tests
-   - The `_linux-test.yml` workflow follows a similar pattern:
-     - It uses the `calculate-docker-image` step to determine which Docker image to use
-     - It pulls the Docker image and runs a container with that image
-     - It installs the wheels from the artifacts generated by PyTorch build jobs
-     - It executes test scripts (like `.ci/pytorch/test.sh` or `.ci/pytorch/multigpu-test.sh`) inside the container
-
-### Understanding File Purposes
-
-#### `.ci/docker/build.sh` vs `.ci/pytorch/build.sh`
- **`.ci/docker/build.sh`**:
-  - Used for building base Docker images
-  - Executed by the `docker-builds.yml` workflow to pre-build Docker images for CI
-  - Contains configurations for different Docker build environments
-
- **`.ci/pytorch/build.sh`**:
-  - Used for building PyTorch inside a Docker container
-  - Called by workflows like `_linux-build.yml` after the Docker container is started
-  - Builds PyTorch wheels and other artifacts
-
-#### `.ci/docker/ci_commit_pins/` vs `.github/ci_commit_pins`
- **`.ci/docker/ci_commit_pins/`**:
-  - Used for pinning dependency versions during base Docker image building
-  - Ensures consistent environments for building PyTorch
-  - Changes here trigger base Docker image rebuilds
-
- **`.github/ci_commit_pins`**:
-  - Used for pinning dependency versions during PyTorch building and tests
-  - Ensures consistent dependencies for PyTorch across different builds
-  - Used by build scripts running inside Docker containers
-
-### Step-by-Step Guide for Adding a New Base Docker Image
-
-#### 1. Add Pinned Commits (If Applicable)
-
-We use pinned commits for build stability. The `nightly.yml` workflow checks and updates pinned commits for certain repository dependencies daily.
-
-If your new Docker image needs a library installed from a specific pinned commit or built from source:
-
-1. Add the repository you want to track in `nightly.yml` and `merge-rules.yml`
-2. Add the initial pinned commit in `.ci/docker/ci_commit_pins/`. The text filename should match the one defined in step 1
-
-#### 2. Configure the Base Docker Image
-1. **Add new Base Docker image configuration** (if applicable):
-
-   Add the configuration in `.ci/docker/build.sh`. For example:
-   ```bash
-   pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-new1)
-     CUDA_VERSION=12.8.1
-     ANACONDA_PYTHON_VERSION=3.12
-     GCC_VERSION=11
-     VISION=yes
-     KATEX=yes
-     UCX_COMMIT=${_UCX_COMMIT}
-     UCC_COMMIT=${_UCC_COMMIT}
-     TRITON=yes
-     NEW_ARG_1=yes
-     ;;
-   ```
-
-2. **Add build arguments to Docker build command**:
-
-   If you're introducing a new argument to the Docker build, make sure to add it in the Docker build step in `.ci/docker/build.sh`:
-   ```bash
-   docker build \
-     ....
-     --build-arg "NEW_ARG_1=${NEW_ARG_1}"
-   ```
-
-3. **Update Dockerfile logic**:
-
-   Update the Dockerfile to use the new argument. For example, in `ubuntu/Dockerfile`:
-   ```dockerfile
-   ARG NEW_ARG_1
-   # Set up environment for NEW_ARG_1
-   RUN if [ -n "${NEW_ARG_1}" ]; then bash ./do_something.sh; fi
-   ```
-
-4. **Add the Docker configuration** in `.github/workflows/docker-builds.yml`:
-
-   The `docker-builds.yml` workflow pre-builds the Docker images whenever changes occur in the `.ci/docker/` directory. This includes the
-   pinned commit updates.
--- a/.ci/docker/almalinux/Dockerfile
+++ b/.ci/docker/almalinux/Dockerfile
@ -1,106 +0,0 @@
-ARG CUDA_VERSION=12.6
-ARG BASE_TARGET=cuda${CUDA_VERSION}
-ARG ROCM_IMAGE=rocm/dev-almalinux-8:6.3-complete
-FROM amd64/almalinux:8.10-20250519 as base
-
-ENV LC_ALL en_US.UTF-8
-ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US.UTF-8
-
-ARG DEVTOOLSET_VERSION=11
-
-RUN yum -y update
-RUN yum -y install epel-release
-# install glibc-langpack-en make sure en_US.UTF-8 locale is available
-RUN yum -y install glibc-langpack-en
-RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel openssl-devel yum-utils autoconf automake make gcc-toolset-${DEVTOOLSET_VERSION}-toolchain
-# Just add everything as a safe.directory for git since these will be used in multiple places with git
-RUN git config --global --add safe.directory '*'
-ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
-
-# cmake-3.18.4 from pip
-RUN yum install -y python3-pip && \
-    python3 -mpip install cmake==3.18.4 && \
-    ln -s /usr/local/bin/cmake /usr/bin/cmake3
-RUN rm -rf /usr/local/cuda-*
-
-FROM base as openssl
-ADD ./common/install_openssl.sh install_openssl.sh
-RUN bash ./install_openssl.sh && rm install_openssl.sh
-
-FROM base as patchelf
-# Install patchelf
-ADD ./common/install_patchelf.sh install_patchelf.sh
-RUN bash ./install_patchelf.sh && rm install_patchelf.sh && cp $(which patchelf) /patchelf
-
-FROM base as conda
-# Install Anaconda
-ADD ./common/install_conda_docker.sh install_conda.sh
-RUN bash ./install_conda.sh && rm install_conda.sh
-
-# Install CUDA
-FROM base as cuda
-ARG CUDA_VERSION=12.6
-RUN rm -rf /usr/local/cuda-*
-ADD ./common/install_cuda.sh install_cuda.sh
-COPY ./common/install_nccl.sh install_nccl.sh
-COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
-COPY ./common/install_cusparselt.sh install_cusparselt.sh
-ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
-# Preserve CUDA_VERSION for the builds
-ENV CUDA_VERSION=${CUDA_VERSION}
-# Make things in our path by default
-ENV PATH=/usr/local/cuda-${CUDA_VERSION}/bin:$PATH
-
-FROM cuda as cuda12.6
-RUN bash ./install_cuda.sh 12.6
-ENV DESIRED_CUDA=12.6
-
-FROM cuda as cuda12.8
-RUN bash ./install_cuda.sh 12.8
-ENV DESIRED_CUDA=12.8
-
-FROM cuda as cuda12.9
-RUN bash ./install_cuda.sh 12.9
-ENV DESIRED_CUDA=12.9
-
-FROM cuda as cuda13.0
-RUN bash ./install_cuda.sh 13.0
-ENV DESIRED_CUDA=13.0
-
-FROM ${ROCM_IMAGE} as rocm
-ENV PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
-ADD ./common/install_mkl.sh install_mkl.sh
-RUN bash ./install_mkl.sh && rm install_mkl.sh
-ENV MKLROOT /opt/intel
-
-# Install MNIST test data
-FROM base as mnist
-ADD ./common/install_mnist.sh install_mnist.sh
-RUN bash ./install_mnist.sh
-
-FROM base as all_cuda
-COPY --from=cuda12.6  /usr/local/cuda-12.6 /usr/local/cuda-12.6
-COPY --from=cuda12.8  /usr/local/cuda-12.8 /usr/local/cuda-12.8
-COPY --from=cuda12.9  /usr/local/cuda-12.9 /usr/local/cuda-12.9
-COPY --from=cuda13.0  /usr/local/cuda-13.0 /usr/local/cuda-13.0
-
-# Final step
-FROM ${BASE_TARGET} as final
-COPY --from=openssl            /opt/openssl           /opt/openssl
-COPY --from=patchelf           /patchelf              /usr/local/bin/patchelf
-COPY --from=conda              /opt/conda             /opt/conda
-
-# Add jni.h for java host build.
-COPY ./common/install_jni.sh install_jni.sh
-COPY ./java/jni.h jni.h
-RUN bash ./install_jni.sh && rm install_jni.sh
-
-ENV PATH /opt/conda/bin:$PATH
-ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
-COPY --from=mnist  /usr/local/mnist /usr/local/mnist
-RUN rm -rf /usr/local/cuda
-RUN chmod o+rw /usr/local
-RUN touch /.condarc && \
-    chmod o+rw /.condarc && \
-    chmod -R o+rw /opt/conda
--- a/.ci/docker/almalinux/build.sh
+++ b/.ci/docker/almalinux/build.sh
@ -1,70 +0,0 @@
-#!/usr/bin/env bash
-# Script used only in CD pipeline
-
-set -exou pipefail
-
-image="$1"
-shift
-
-if [ -z "${image}" ]; then
-  echo "Usage: $0 IMAGENAME:ARCHTAG"
-  exit 1
-fi
-
-# Go from imagename:tag to tag
-DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
-
-CUDA_VERSION=""
-ROCM_VERSION=""
-EXTRA_BUILD_ARGS=""
-if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
-    # extract cuda version from image name and tag.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8
-    CUDA_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
-    EXTRA_BUILD_ARGS="--build-arg CUDA_VERSION=${CUDA_VERSION}"
-elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
-    # extract rocm version from image name and tag.  e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
-    ROCM_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
-    EXTRA_BUILD_ARGS="--build-arg ROCM_IMAGE=rocm/dev-almalinux-8:${ROCM_VERSION}-complete"
-fi
-
-case ${DOCKER_TAG_PREFIX} in
-  cpu)
-    BASE_TARGET=base
-    ;;
-  cuda*)
-    BASE_TARGET=cuda${CUDA_VERSION}
-    ;;
-  rocm*)
-    BASE_TARGET=rocm
-    ;;
-  *)
-    echo "ERROR: Unknown docker tag ${DOCKER_TAG_PREFIX}"
-    exit 1
-    ;;
-esac
-
-# TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
-# is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
-sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
-sudo systemctl daemon-reload
-sudo systemctl restart docker
-
-export DOCKER_BUILDKIT=1
-TOPDIR=$(git rev-parse --show-toplevel)
-tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
-
-docker build \
-  --target final \
-  --progress plain \
-  --build-arg "BASE_TARGET=${BASE_TARGET}" \
-  --build-arg "DEVTOOLSET_VERSION=11" \
-  ${EXTRA_BUILD_ARGS} \
-  -t ${tmp_tag} \
-  $@ \
-  -f "${TOPDIR}/.ci/docker/almalinux/Dockerfile" \
-  ${TOPDIR}/.ci/docker/
-
-if [ -n "${CUDA_VERSION}" ]; then
-  # Test that we're using the right CUDA compiler
-  docker run --rm "${tmp_tag}" nvcc --version | grep "cuda_${CUDA_VERSION}"
-fi
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@ -1,463 +0,0 @@
-#!/bin/bash
-# The purpose of this script is to:
-# 1. Extract the set of parameters to be used for a docker build based on the provided image name.
-# 2. Run docker build with the parameters found in step 1.
-# 3. Run the built image and print out the expected and actual versions of packages installed.
-
-set -ex
-
-image="$1"
-shift
-
-if [ -z "${image}" ]; then
-  echo "Usage: $0 IMAGE"
-  exit 1
-fi
-
-function extract_version_from_image_name() {
-  eval export $2=$(echo "${image}" | perl -n -e"/$1(\d+(\.\d+)?(\.\d+)?)/ && print \$1")
-  if [ "x${!2}" = x ]; then
-    echo "variable '$2' not correctly parsed from image='$image'"
-    exit 1
-  fi
-}
-
-function extract_all_from_image_name() {
-  # parts $image into array, splitting on '-'
-  keep_IFS="$IFS"
-  IFS="-"
-  declare -a parts=($image)
-  IFS="$keep_IFS"
-  unset keep_IFS
-
-  for part in "${parts[@]}"; do
-    name=$(echo "${part}" | perl -n -e"/([a-zA-Z]+)\d+(\.\d+)?(\.\d+)?/ && print \$1")
-    vername="${name^^}_VERSION"
-    # "py" is the odd one out, needs this special case
-    if [ "x${name}" = xpy ]; then
-      vername=ANACONDA_PYTHON_VERSION
-    fi
-    # skip non-conforming fields such as "pytorch", "linux" or "bionic" without version string
-    if [ -n "${name}" ]; then
-      extract_version_from_image_name "${name}" "${vername}"
-    fi
-  done
-}
-
-# Use the same pre-built XLA test image from PyTorch/XLA
-if [[ "$image" == *xla* ]]; then
-  echo "Using pre-built XLA test image..."
-  exit 0
-fi
-
-if [[ "$image" == *-jammy* ]]; then
-  UBUNTU_VERSION=22.04
-elif [[ "$image" == *-noble* ]]; then
-  UBUNTU_VERSION=24.04
-elif [[ "$image" == *ubuntu* ]]; then
-  extract_version_from_image_name ubuntu UBUNTU_VERSION
-fi
-
-if [ -n "${UBUNTU_VERSION}" ]; then
-  OS="ubuntu"
-else
-  echo "Unable to derive operating system base..."
-  exit 1
-fi
-
-DOCKERFILE="${OS}/Dockerfile"
-if [[ "$image" == *rocm* ]]; then
-  DOCKERFILE="${OS}-rocm/Dockerfile"
-elif [[ "$image" == *xpu* ]]; then
-  DOCKERFILE="${OS}-xpu/Dockerfile"
-elif [[ "$image" == *cuda*linter* ]]; then
-  # Use a separate Dockerfile for linter to keep a small image size
-  DOCKERFILE="linter-cuda/Dockerfile"
-elif [[ "$image" == *linter* ]]; then
-  # Use a separate Dockerfile for linter to keep a small image size
-  DOCKERFILE="linter/Dockerfile"
-elif [[ "$image" == *riscv* ]]; then
-  # Use RISC-V specific Dockerfile
-  DOCKERFILE="ubuntu-cross-riscv/Dockerfile"
-fi
-
-_UCX_COMMIT=7836b165abdbe468a2f607e7254011c07d788152
-_UCC_COMMIT=430e241bf5d38cbc73fc7a6b89155397232e3f96
-if [[ "$image" == *rocm* ]]; then
-  _UCX_COMMIT=cc312eaa4655c0cc5c2bcd796db938f90563bcf6
-  _UCC_COMMIT=0c0fc21559835044ab107199e334f7157d6a0d3d
-fi
-
-tag=$(echo $image | awk -F':' '{print $2}')
-
-# It's annoying to rename jobs every time you want to rewrite a
-# configuration, so we hardcode everything here rather than do it
-# from scratch
-case "$tag" in
-  pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11)
-    CUDA_VERSION=12.4
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=11
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc11)
-    CUDA_VERSION=12.8.1
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=11
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11)
-    CUDA_VERSION=13.0.0
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=11
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
-    CUDA_VERSION=12.8.1
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    TRITON=yes
-    INDUCTOR_BENCHMARKS=yes
-    ;;
-  pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm)
-    CUDA_VERSION=12.8.1
-    ANACONDA_PYTHON_VERSION=3.12
-    GCC_VERSION=11
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9)
-    CUDA_VERSION=12.8.1
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-py3-clang12-onnx)
-    ANACONDA_PYTHON_VERSION=3.10
-    CLANG_VERSION=12
-    VISION=yes
-    ONNX=yes
-    ;;
-  pytorch-linux-jammy-py3.10-clang12)
-    ANACONDA_PYTHON_VERSION=3.10
-    CLANG_VERSION=12
-    VISION=yes
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-rocm-n-py3 | pytorch-linux-jammy-rocm-n-py3-benchmarks | pytorch-linux-noble-rocm-n-py3)
-    if [[ $tag =~ "jammy" ]]; then
-      ANACONDA_PYTHON_VERSION=3.10
-    else
-      ANACONDA_PYTHON_VERSION=3.12
-    fi
-    GCC_VERSION=11
-    VISION=yes
-    ROCM_VERSION=6.4
-    NINJA_VERSION=1.9.0
-    TRITON=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    if [[ $tag =~ "benchmarks" ]]; then
-      INDUCTOR_BENCHMARKS=yes
-    fi
-    ;;
-  pytorch-linux-noble-rocm-alpha-py3)
-    ANACONDA_PYTHON_VERSION=3.12
-    GCC_VERSION=11
-    VISION=yes
-    ROCM_VERSION=7.0
-    NINJA_VERSION=1.9.0
-    TRITON=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    PYTORCH_ROCM_ARCH="gfx90a;gfx942;gfx950"
-    ;;
-  pytorch-linux-jammy-xpu-n-1-py3)
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=11
-    VISION=yes
-    XPU_VERSION=2025.1
-    NINJA_VERSION=1.9.0
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-xpu-n-py3)
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=11
-    VISION=yes
-    XPU_VERSION=2025.2
-    NINJA_VERSION=1.9.0
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-py3-gcc11-inductor-benchmarks)
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=11
-    VISION=yes
-    KATEX=yes
-    TRITON=yes
-    DOCS=yes
-    INDUCTOR_BENCHMARKS=yes
-    ;;
-  pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-clang12)
-    ANACONDA_PYTHON_VERSION=3.10
-    CUDA_VERSION=12.8.1
-    CLANG_VERSION=12
-    VISION=yes
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-py3-clang18-asan)
-    ANACONDA_PYTHON_VERSION=3.10
-    CLANG_VERSION=18
-    VISION=yes
-    ;;
-  pytorch-linux-jammy-py3.10-gcc11)
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=11
-    VISION=yes
-    KATEX=yes
-    TRITON=yes
-    DOCS=yes
-    UNINSTALL_DILL=yes
-    ;;
-  pytorch-linux-jammy-py3-clang12-executorch)
-    ANACONDA_PYTHON_VERSION=3.10
-    CLANG_VERSION=12
-    EXECUTORCH=yes
-    ;;
-  pytorch-linux-jammy-py3.12-halide)
-    CUDA_VERSION=12.6
-    ANACONDA_PYTHON_VERSION=3.12
-    GCC_VERSION=11
-    HALIDE=yes
-    TRITON=yes
-    ;;
-  pytorch-linux-jammy-py3.12-triton-cpu)
-    CUDA_VERSION=12.6
-    ANACONDA_PYTHON_VERSION=3.12
-    GCC_VERSION=11
-    TRITON_CPU=yes
-    ;;
-  pytorch-linux-jammy-linter)
-    PYTHON_VERSION=3.10
-    ;;
-  pytorch-linux-jammy-cuda12.8-cudnn9-py3.10-linter)
-    PYTHON_VERSION=3.10
-    CUDA_VERSION=12.8.1
-    ;;
-  pytorch-linux-jammy-aarch64-py3.10-gcc11)
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=11
-    ACL=yes
-    VISION=yes
-    OPENBLAS=yes
-    # snadampal: skipping llvm src build install because the current version
-    # from pytorch/llvm:9.0.1 is x86 specific
-    SKIP_LLVM_SRC_BUILD_INSTALL=yes
-    ;;
-  pytorch-linux-jammy-aarch64-py3.10-gcc11-inductor-benchmarks)
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=11
-    ACL=yes
-    VISION=yes
-    OPENBLAS=yes
-    # snadampal: skipping llvm src build install because the current version
-    # from pytorch/llvm:9.0.1 is x86 specific
-    SKIP_LLVM_SRC_BUILD_INSTALL=yes
-    INDUCTOR_BENCHMARKS=yes
-    ;;
-  pytorch-linux-noble-riscv64-py3.12-gcc14)
-    GCC_VERSION=14
-    ;;
-  *)
-    # Catch-all for builds that are not hardcoded.
-    VISION=yes
-    echo "image '$image' did not match an existing build configuration"
-    if [[ "$image" == *py* ]]; then
-      extract_version_from_image_name py ANACONDA_PYTHON_VERSION
-    fi
-    if [[ "$image" == *cuda* ]]; then
-      extract_version_from_image_name cuda CUDA_VERSION
-    fi
-    if [[ "$image" == *rocm* ]]; then
-      extract_version_from_image_name rocm ROCM_VERSION
-      NINJA_VERSION=1.9.0
-      TRITON=yes
-      # To ensure that any ROCm config will build using conda cmake
-      # and thus have LAPACK/MKL enabled
-      fi
-    if [[ "$image" == *centos7* ]]; then
-      NINJA_VERSION=1.10.2
-    fi
-    if [[ "$image" == *gcc* ]]; then
-      extract_version_from_image_name gcc GCC_VERSION
-    fi
-    if [[ "$image" == *clang* ]]; then
-      extract_version_from_image_name clang CLANG_VERSION
-    fi
-    if [[ "$image" == *devtoolset* ]]; then
-      extract_version_from_image_name devtoolset DEVTOOLSET_VERSION
-    fi
-    if [[ "$image" == *glibc* ]]; then
-      extract_version_from_image_name glibc GLIBC_VERSION
-    fi
-  ;;
-esac
-
-tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
-
-no_cache_flag=""
-progress_flag=""
-# Do not use cache and progress=plain when in CI
-if [[ -n "${CI:-}" ]]; then
-  no_cache_flag="--no-cache"
-  progress_flag="--progress=plain"
-fi
-
-# Build image
-docker build \
-       ${no_cache_flag} \
-       ${progress_flag} \
-       --build-arg "BUILD_ENVIRONMENT=${image}" \
-       --build-arg "LLVMDEV=${LLVMDEV:-}" \
-       --build-arg "VISION=${VISION:-}" \
-       --build-arg "UBUNTU_VERSION=${UBUNTU_VERSION}" \
-       --build-arg "DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}" \
-       --build-arg "GLIBC_VERSION=${GLIBC_VERSION}" \
-       --build-arg "CLANG_VERSION=${CLANG_VERSION}" \
-       --build-arg "ANACONDA_PYTHON_VERSION=${ANACONDA_PYTHON_VERSION}" \
-       --build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
-       --build-arg "GCC_VERSION=${GCC_VERSION}" \
-       --build-arg "CUDA_VERSION=${CUDA_VERSION}" \
-       --build-arg "NINJA_VERSION=${NINJA_VERSION:-}" \
-       --build-arg "KATEX=${KATEX:-}" \
-       --build-arg "ROCM_VERSION=${ROCM_VERSION:-}" \
-       --build-arg "PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH:-gfx90a;gfx942}" \
-       --build-arg "IMAGE_NAME=${IMAGE_NAME}" \
-       --build-arg "UCX_COMMIT=${UCX_COMMIT}" \
-       --build-arg "UCC_COMMIT=${UCC_COMMIT}" \
-       --build-arg "TRITON=${TRITON}" \
-       --build-arg "TRITON_CPU=${TRITON_CPU}" \
-       --build-arg "ONNX=${ONNX}" \
-       --build-arg "DOCS=${DOCS}" \
-       --build-arg "INDUCTOR_BENCHMARKS=${INDUCTOR_BENCHMARKS}" \
-       --build-arg "EXECUTORCH=${EXECUTORCH}" \
-       --build-arg "HALIDE=${HALIDE}" \
-       --build-arg "XPU_VERSION=${XPU_VERSION}" \
-       --build-arg "UNINSTALL_DILL=${UNINSTALL_DILL}" \
-       --build-arg "ACL=${ACL:-}" \
-       --build-arg "OPENBLAS=${OPENBLAS:-}" \
-       --build-arg "SKIP_SCCACHE_INSTALL=${SKIP_SCCACHE_INSTALL:-}" \
-       --build-arg "SKIP_LLVM_SRC_BUILD_INSTALL=${SKIP_LLVM_SRC_BUILD_INSTALL:-}" \
-       -f $(dirname ${DOCKERFILE})/Dockerfile \
-       -t "$tmp_tag" \
-       "$@" \
-       .
-
-# NVIDIA dockers for RC releases use tag names like `11.0-cudnn9-devel-ubuntu18.04-rc`,
-# for this case we will set UBUNTU_VERSION to `18.04-rc` so that the Dockerfile could
-# find the correct image. As a result, here we have to replace the
-#   "$UBUNTU_VERSION" == "18.04-rc"
-# with
-#   "$UBUNTU_VERSION" == "18.04"
-UBUNTU_VERSION=$(echo ${UBUNTU_VERSION} | sed 's/-rc$//')
-
-function drun() {
-  docker run --rm "$tmp_tag" "$@"
-}
-
-if [[ "$OS" == "ubuntu" ]]; then
-
-  if !(drun lsb_release -a 2>&1 | grep -qF Ubuntu); then
-    echo "OS=ubuntu, but:"
-    drun lsb_release -a
-    exit 1
-  fi
-  if !(drun lsb_release -a 2>&1 | grep -qF "$UBUNTU_VERSION"); then
-    echo "UBUNTU_VERSION=$UBUNTU_VERSION, but:"
-    drun lsb_release -a
-    exit 1
-  fi
-fi
-
-if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
-  if !(drun python --version 2>&1 | grep -qF "Python $ANACONDA_PYTHON_VERSION"); then
-    echo "ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION, but:"
-    drun python --version
-    exit 1
-  fi
-fi
-
-if [ -n "$GCC_VERSION" ]; then
-  if [[ "$image" == *riscv* ]]; then
-    # Check RISC-V cross-compilation toolchain version
-    if !(drun riscv64-linux-gnu-gcc-${GCC_VERSION} --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
-      echo "RISC-V GCC_VERSION=$GCC_VERSION, but:"
-      drun riscv64-linux-gnu-gcc-${GCC_VERSION} --version
-      exit 1
-    fi
-  elif !(drun gcc --version 2>&1 | grep -q " $GCC_VERSION\\W"); then
-    echo "GCC_VERSION=$GCC_VERSION, but:"
-    drun gcc --version
-    exit 1
-  fi
-fi
-
-if [ -n "$CLANG_VERSION" ]; then
-  if !(drun clang --version 2>&1 | grep -qF "clang version $CLANG_VERSION"); then
-    echo "CLANG_VERSION=$CLANG_VERSION, but:"
-    drun clang --version
-    exit 1
-  fi
-fi
-
-if [ -n "$KATEX" ]; then
-  if !(drun katex --version); then
-    echo "KATEX=$KATEX, but:"
-    drun katex --version
-    exit 1
-  fi
-fi
-
-HAS_TRITON=$(drun python -c "import triton" > /dev/null 2>&1 && echo "yes" || echo "no")
-if [[ -n "$TRITON" || -n "$TRITON_CPU" ]]; then
-  if [ "$HAS_TRITON" = "no" ]; then
-    echo "expecting triton to be installed, but it is not"
-    exit 1
-  fi
-elif [ "$HAS_TRITON" = "yes" ]; then
-  echo "expecting triton to not be installed, but it is"
-  exit 1
-fi
-
-# Sanity check cmake version.  Executorch reinstalls cmake and I'm not sure if
-# they support 4.0.0 yet, so exclude them from this check.
-CMAKE_VERSION=$(drun cmake --version)
-if [[ "$EXECUTORCH" != *yes* && "$CMAKE_VERSION" != *4.* ]]; then
-  echo "CMake version is not 4.0.0:"
-  drun cmake --version
-  exit 1
-fi
--- a/.ci/docker/centos-rocm/Dockerfile
+++ b/.ci/docker/centos-rocm/Dockerfile
@ -1,109 +0,0 @@
-ARG CENTOS_VERSION
-
-FROM centos:${CENTOS_VERSION}
-
-ARG CENTOS_VERSION
-
-# Set AMD gpu targets to build for
-ARG PYTORCH_ROCM_ARCH
-ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
-
-# Install required packages to build Caffe2
-
-# Install common dependencies (so that this step can be cached separately)
-COPY ./common/install_base.sh install_base.sh
-RUN bash ./install_base.sh && rm install_base.sh
-
-# Update CentOS git version
-RUN yum -y remove git
-RUN yum -y remove git-*
-RUN yum -y install https://packages.endpointdev.com/rhel/7/os/x86_64/endpoint-repo-1.9-1.x86_64.rpm && \
-    sed -i 's/packages.endpoint/packages.endpointdev/' /etc/yum.repos.d/endpoint.repo
-RUN yum install -y git
-
-# Install devtoolset
-ARG DEVTOOLSET_VERSION
-COPY ./common/install_devtoolset.sh install_devtoolset.sh
-RUN bash ./install_devtoolset.sh && rm install_devtoolset.sh
-ENV BASH_ENV "/etc/profile"
-
-# (optional) Install non-default glibc version
-ARG GLIBC_VERSION
-COPY ./common/install_glibc.sh install_glibc.sh
-RUN if [ -n "${GLIBC_VERSION}" ]; then bash ./install_glibc.sh; fi
-RUN rm install_glibc.sh
-
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
-# Install conda and other packages (e.g., numpy, pytest)
-ARG ANACONDA_PYTHON_VERSION
-ARG BUILD_ENVIRONMENT
-ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
-ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
-COPY requirements-ci.txt /opt/conda/requirements-ci.txt
-COPY ./common/install_conda.sh install_conda.sh
-COPY ./common/common_utils.sh common_utils.sh
-RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
-
-# (optional) Install vision packages like OpenCV
-ARG VISION
-COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
-RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
-RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
-ENV INSTALLED_VISION ${VISION}
-
-# Install rocm
-ARG ROCM_VERSION
-RUN mkdir ci_commit_pins
-COPY ./common/common_utils.sh common_utils.sh
-COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt
-COPY ./common/install_rocm.sh install_rocm.sh
-RUN bash ./install_rocm.sh
-RUN rm install_rocm.sh common_utils.sh
-RUN rm -r ci_commit_pins
-COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
-RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
-RUN rm install_rocm_magma.sh
-COPY ./common/install_amdsmi.sh install_amdsmi.sh
-RUN bash ./install_amdsmi.sh
-RUN rm install_amdsmi.sh
-ENV PATH /opt/rocm/bin:$PATH
-ENV PATH /opt/rocm/hcc/bin:$PATH
-ENV PATH /opt/rocm/hip/bin:$PATH
-ENV PATH /opt/rocm/opencl/bin:$PATH
-ENV PATH /opt/rocm/llvm/bin:$PATH
-ENV MAGMA_HOME /opt/rocm/magma
-ENV LANG en_US.utf8
-ENV LC_ALL en_US.utf8
-
-# (optional) Install non-default Ninja version
-ARG NINJA_VERSION
-COPY ./common/install_ninja.sh install_ninja.sh
-RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
-RUN rm install_ninja.sh
-
-ARG TRITON
-# Install triton, this needs to be done before sccache because the latter will
-# try to reach out to S3, which docker build runners don't have access
-ENV CMAKE_C_COMPILER cc
-ENV CMAKE_CXX_COMPILER c++
-COPY ./common/install_triton.sh install_triton.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton.txt triton.txt
-COPY triton_version.txt triton_version.txt
-RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
-
-# Install ccache/sccache (do this last, so we get priority in PATH)
-COPY ./common/install_cache.sh install_cache.sh
-ENV PATH /opt/cache/bin:$PATH
-RUN bash ./install_cache.sh && rm install_cache.sh
-
-# Include BUILD_ENVIRONMENT environment variable in image
-ARG BUILD_ENVIRONMENT
-ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
-
-USER jenkins
-CMD ["bash"]
--- a/.ci/docker/ci_commit_pins/executorch.txt
+++ b/.ci/docker/ci_commit_pins/executorch.txt
@ -1 +0,0 @@
-e0dda9059d082537cee36be6c5e4fe3b18c880c0
--- a/.ci/docker/ci_commit_pins/halide.txt
+++ b/.ci/docker/ci_commit_pins/halide.txt
@ -1 +0,0 @@
-461c12871f336fe6f57b55d6a297f13ef209161b
--- a/.ci/docker/ci_commit_pins/huggingface-requirements.txt
+++ b/.ci/docker/ci_commit_pins/huggingface-requirements.txt
@ -1,2 +0,0 @@
-transformers==4.56.0
-soxr==0.5.0
--- a/.ci/docker/ci_commit_pins/nccl-cu11.txt
+++ b/.ci/docker/ci_commit_pins/nccl-cu11.txt
@ -1 +0,0 @@
-v2.21.5-1
--- a/.ci/docker/ci_commit_pins/nccl-cu12.txt
+++ b/.ci/docker/ci_commit_pins/nccl-cu12.txt
@ -1 +0,0 @@
-v2.27.5-1
--- a/.ci/docker/ci_commit_pins/nccl-cu13.txt
+++ b/.ci/docker/ci_commit_pins/nccl-cu13.txt
@ -1 +0,0 @@
-v2.27.7-1
--- a/.ci/docker/ci_commit_pins/rocm-composable-kernel.txt
+++ b/.ci/docker/ci_commit_pins/rocm-composable-kernel.txt
@ -1 +0,0 @@
-7fe50dc3da2069d6645d9deb8c017a876472a977
--- a/.ci/docker/ci_commit_pins/timm.txt
+++ b/.ci/docker/ci_commit_pins/timm.txt
@ -1 +0,0 @@
-5d535d7a2d4b435b1b5c1177fd8f04a12b942b9a
--- a/.ci/docker/ci_commit_pins/torchbench.txt
+++ b/.ci/docker/ci_commit_pins/torchbench.txt
@ -1 +0,0 @@
-74a23feff57432129df84d8099e622773cf77925
--- a/.ci/docker/ci_commit_pins/triton-cpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-cpu.txt
@ -1 +0,0 @@
-c7711371cace304afe265c1ffa906415ab82fc66
--- a/.ci/docker/ci_commit_pins/triton-xpu.txt
+++ b/.ci/docker/ci_commit_pins/triton-xpu.txt
@ -1 +0,0 @@
-1b0418a9a454b2b93ab8d71f40e59d2297157fae
--- a/.ci/docker/ci_commit_pins/triton.txt
+++ b/.ci/docker/ci_commit_pins/triton.txt
@ -1 +0,0 @@
-bbb06c0334a6772b92d24bde54956e675c8c6604
--- a/.ci/docker/common/cache_vision_models.sh
+++ b/.ci/docker/common/cache_vision_models.sh
@ -1,18 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
-
-# Cache the test models at ~/.cache/torch/hub/
-IMPORT_SCRIPT_FILENAME="/tmp/torchvision_import_script.py"
-as_jenkins echo 'import torchvision; torchvision.models.mobilenet_v2(pretrained=True); torchvision.models.mobilenet_v3_large(pretrained=True);' > "${IMPORT_SCRIPT_FILENAME}"
-
-pip_install --pre torch torchvision --index-url https://download.pytorch.org/whl/nightly/cpu
-# Very weird quoting behavior here https://github.com/conda/conda/issues/10972,
-# so echo the command to a file and run the file instead
-conda_run python "${IMPORT_SCRIPT_FILENAME}"
-
-# Cleaning up
-conda_run pip uninstall -y torch torchvision
-rm "${IMPORT_SCRIPT_FILENAME}" || true
--- a/.ci/docker/common/common_utils.sh
+++ b/.ci/docker/common/common_utils.sh
@ -1,40 +0,0 @@
-#!/bin/bash
-
-# Work around bug where devtoolset replaces sudo and breaks it.
-if [ -n "$DEVTOOLSET_VERSION" ]; then
-  export SUDO=/bin/sudo
-else
-  export SUDO=sudo
-fi
-
-as_jenkins() {
-  # NB: unsetting the environment variables works around a conda bug
-  # https://github.com/conda/conda/issues/6576
-  # NB: Pass on PATH and LD_LIBRARY_PATH to sudo invocation
-  # NB: This must be run from a directory that jenkins has access to,
-  # works around https://github.com/conda/conda-package-handling/pull/34
-  $SUDO -E -H -u jenkins env -u SUDO_UID -u SUDO_GID -u SUDO_COMMAND -u SUDO_USER env "PATH=$PATH" "LD_LIBRARY_PATH=$LD_LIBRARY_PATH" $*
-}
-
-conda_install() {
-  # Ensure that the install command don't upgrade/downgrade Python
-  # This should be called as
-  #   conda_install pkg1 pkg2 ... [-c channel]
-  as_jenkins conda install -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
-}
-
-conda_install_through_forge() {
-  as_jenkins conda install -c conda-forge -q -n py_$ANACONDA_PYTHON_VERSION -y python="$ANACONDA_PYTHON_VERSION" $*
-}
-
-conda_run() {
-  as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION --no-capture-output $*
-}
-
-pip_install() {
-  as_jenkins conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
-}
-
-get_pinned_commit() {
-  cat "${1}".txt
-}
--- a/.ci/docker/common/install_acl.sh
+++ b/.ci/docker/common/install_acl.sh
@ -1,16 +0,0 @@
-set -euo pipefail
-
-readonly version=v25.02
-readonly src_host=https://github.com/ARM-software
-readonly src_repo=ComputeLibrary
-
-# Clone ACL
-[[ ! -d ${src_repo} ]] && git clone ${src_host}/${src_repo}.git
-cd ${src_repo}
-
-git checkout $version
-
-# Build with scons
-scons -j8  Werror=0 debug=0 neon=1 opencl=0 embed_kernels=0 \
-  os=linux arch=armv8a build=native multi_isa=1 \
-  fixed_format_kernels=1 openmp=1 cppthreads=0
--- a/.ci/docker/common/install_amdsmi.sh
+++ b/.ci/docker/common/install_amdsmi.sh
@ -1,5 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-cd /opt/rocm/share/amd_smi && pip install .
--- a/.ci/docker/common/install_cache.sh
+++ b/.ci/docker/common/install_cache.sh
@ -1,142 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-install_ubuntu() {
-  echo "Preparing to build sccache from source"
-  apt-get update
-  # libssl-dev will not work as it is upgraded to libssl3 in Ubuntu-22.04.
-  # Instead use lib and headers from OpenSSL1.1 installed in `install_openssl.sh``
-  apt-get install -y cargo
-  echo "Checking out sccache repo"
-  git clone https://github.com/mozilla/sccache -b v0.10.0
-  cd sccache
-  echo "Building sccache"
-  cargo build --release
-  cp target/release/sccache /opt/cache/bin
-  echo "Cleaning up"
-  cd ..
-  rm -rf sccache
-  apt-get remove -y cargo rustc
-  apt-get autoclean && apt-get clean
-
-  echo "Downloading old sccache binary from S3 repo for PCH builds"
-  curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache-0.2.14a
-  chmod 755 /opt/cache/bin/sccache-0.2.14a
-}
-
-install_binary() {
-  echo "Downloading sccache binary from S3 repo"
-  curl --retry 3 https://s3.amazonaws.com/ossci-linux/sccache -o /opt/cache/bin/sccache
-}
-
-mkdir -p /opt/cache/bin
-mkdir -p /opt/cache/lib
-sed -e 's|PATH="\(.*\)"|PATH="/opt/cache/bin:\1"|g' -i /etc/environment
-export PATH="/opt/cache/bin:$PATH"
-
-# Setup compiler cache
-install_ubuntu
-chmod a+x /opt/cache/bin/sccache
-
-function write_sccache_stub() {
-  # Unset LD_PRELOAD for ps because of asan + ps issues
-  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90589
-  if [ $1 == "gcc" ]; then
-    # Do not call sccache recursively when dumping preprocessor argument
-    # For some reason it's very important for the first cached nvcc invocation
-    cat >"/opt/cache/bin/$1" <<EOF
-#!/bin/sh
-
-# sccache does not support -E flag, so we need to call the original compiler directly in order to avoid calling this wrapper recursively
-for arg in "\$@"; do
-  if [ "\$arg" = "-E" ]; then
-    exec $(which $1) "\$@"
-  fi
-done
-
-if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
-  exec sccache $(which $1) "\$@"
-else
-  exec $(which $1) "\$@"
-fi
-EOF
-  else
-    cat >"/opt/cache/bin/$1" <<EOF
-#!/bin/sh
-
-if [ \$(env -u LD_PRELOAD ps -p \$PPID -o comm=) != sccache ]; then
-  exec sccache $(which $1) "\$@"
-else
-  exec $(which $1) "\$@"
-fi
-EOF
-  fi
-  chmod a+x "/opt/cache/bin/$1"
-}
-
-write_sccache_stub cc
-write_sccache_stub c++
-write_sccache_stub gcc
-write_sccache_stub g++
-
-# NOTE: See specific ROCM_VERSION case below.
-if [ "x$ROCM_VERSION" = x ]; then
-  write_sccache_stub clang
-  write_sccache_stub clang++
-fi
-
-if [ -n "$CUDA_VERSION" ]; then
-  # TODO: This is a workaround for the fact that PyTorch's FindCUDA
-  # implementation cannot find nvcc if it is setup this way, because it
-  # appears to search for the nvcc in PATH, and use its path to infer
-  # where CUDA is installed.  Instead, we install an nvcc symlink outside
-  # of the PATH, and set CUDA_NVCC_EXECUTABLE so that we make use of it.
-
-  write_sccache_stub nvcc
-  mv /opt/cache/bin/nvcc /opt/cache/lib/
-fi
-
-if [ -n "$ROCM_VERSION" ]; then
-  # ROCm compiler is hcc or clang. However, it is commonly invoked via hipcc wrapper.
-  # hipcc will call either hcc or clang using an absolute path starting with /opt/rocm,
-  # causing the /opt/cache/bin to be skipped. We must create the sccache wrappers
-  # directly under /opt/rocm while also preserving the original compiler names.
-  # Note symlinks will chain as follows: [hcc or clang++] -> clang -> clang-??
-  # Final link in symlink chain must point back to original directory.
-
-  # Original compiler is moved one directory deeper. Wrapper replaces it.
-  function write_sccache_stub_rocm() {
-    OLDCOMP=$1
-    COMPNAME=$(basename $OLDCOMP)
-    TOPDIR=$(dirname $OLDCOMP)
-    WRAPPED="$TOPDIR/original/$COMPNAME"
-    mv "$OLDCOMP" "$WRAPPED"
-    printf "#!/bin/sh\nexec sccache $WRAPPED \"\$@\"" >"$OLDCOMP"
-    chmod a+x "$OLDCOMP"
-  }
-
-  if [[ -e "/opt/rocm/hcc/bin/hcc" ]]; then
-    # ROCm 3.3 or earlier.
-    mkdir /opt/rocm/hcc/bin/original
-    write_sccache_stub_rocm /opt/rocm/hcc/bin/hcc
-    write_sccache_stub_rocm /opt/rocm/hcc/bin/clang
-    write_sccache_stub_rocm /opt/rocm/hcc/bin/clang++
-    # Fix last link in symlink chain, clang points to versioned clang in prior dir
-    pushd /opt/rocm/hcc/bin/original
-    ln -s ../$(readlink clang)
-    popd
-  elif [[ -e "/opt/rocm/llvm/bin/clang" ]]; then
-    # ROCm 3.5 and beyond.
-    mkdir /opt/rocm/llvm/bin/original
-    write_sccache_stub_rocm /opt/rocm/llvm/bin/clang
-    write_sccache_stub_rocm /opt/rocm/llvm/bin/clang++
-    # Fix last link in symlink chain, clang points to versioned clang in prior dir
-    pushd /opt/rocm/llvm/bin/original
-    ln -s ../$(readlink clang)
-    popd
-  else
-    echo "Cannot find ROCm compiler."
-    exit 1
-  fi
-fi
--- a/.ci/docker/common/install_clang.sh
+++ b/.ci/docker/common/install_clang.sh
@ -1,45 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-if [ -n "$CLANG_VERSION" ]; then
-
-  if [[ $UBUNTU_VERSION == 22.04 ]]; then
-    # work around ubuntu apt-get conflicts
-    sudo apt-get -y -f install
-    wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -
-    if [[ $CLANG_VERSION == 18 ]]; then
-      apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-18 main"
-    fi
-  fi
-
-  sudo apt-get update
-  if [[ $CLANG_VERSION -ge 18 ]]; then
-    apt-get install -y libomp-${CLANG_VERSION}-dev libclang-rt-${CLANG_VERSION}-dev clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
-  else
-    apt-get install -y --no-install-recommends clang-"$CLANG_VERSION" llvm-"$CLANG_VERSION"
-  fi
-
-  # Install dev version of LLVM.
-  if [ -n "$LLVMDEV" ]; then
-    sudo apt-get install -y --no-install-recommends llvm-"$CLANG_VERSION"-dev
-  fi
-
-  # Use update-alternatives to make this version the default
-  update-alternatives --install /usr/bin/clang clang /usr/bin/clang-"$CLANG_VERSION" 50
-  update-alternatives --install /usr/bin/clang++ clang++ /usr/bin/clang++-"$CLANG_VERSION" 50
-  # Override cc/c++ to clang as well
-  update-alternatives --install /usr/bin/cc cc /usr/bin/clang 50
-  update-alternatives --install /usr/bin/c++ c++ /usr/bin/clang++ 50
-
-  # clang's packaging is a little messed up (the runtime libs aren't
-  # added into the linker path), so give it a little help
-  clang_lib=("/usr/lib/llvm-$CLANG_VERSION/lib/clang/"*"/lib/linux")
-  echo "$clang_lib" >/etc/ld.so.conf.d/clang.conf
-  ldconfig
-
-  # Cleanup package manager
-  apt-get autoclean && apt-get clean
-  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-
-fi
--- a/.ci/docker/common/install_conda.sh
+++ b/.ci/docker/common/install_conda.sh
@ -1,101 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-# Optionally install conda
-if [ -n "$ANACONDA_PYTHON_VERSION" ]; then
-  BASE_URL="https://github.com/conda-forge/miniforge/releases/latest/download"  # @lint-ignore
-  CONDA_FILE="Miniforge3-Linux-$(uname -m).sh"
-
-  MAJOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 1)
-  MINOR_PYTHON_VERSION=$(echo "$ANACONDA_PYTHON_VERSION" | cut -d . -f 2)
-
-  case "$MAJOR_PYTHON_VERSION" in
-    3);;
-    *)
-      echo "Unsupported ANACONDA_PYTHON_VERSION: $ANACONDA_PYTHON_VERSION"
-      exit 1
-      ;;
-  esac
-  mkdir -p /opt/conda
-  chown jenkins:jenkins /opt/conda
-
-  SCRIPT_FOLDER="$( cd "$(dirname "$0")" ; pwd -P )"
-  source "${SCRIPT_FOLDER}/common_utils.sh"
-
-  pushd /tmp
-  wget -q "${BASE_URL}/${CONDA_FILE}"
-  # NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
-  as_jenkins bash "${CONDA_FILE}" -b -f -p "/opt/conda"
-  popd
-
-  # NB: Don't do this, rely on the rpath to get it right
-  #echo "/opt/conda/lib" > /etc/ld.so.conf.d/conda-python.conf
-  #ldconfig
-  sed -e 's|PATH="\(.*\)"|PATH="/opt/conda/bin:\1"|g' -i /etc/environment
-  export PATH="/opt/conda/bin:$PATH"
-
-  # Ensure we run conda in a directory that jenkins has write access to
-  pushd /opt/conda
-
-  # Prevent conda from updating to 4.14.0, which causes docker build failures
-  # See https://hud.pytorch.org/pytorch/pytorch/commit/754d7f05b6841e555cea5a4b2c505dd9e0baec1d
-  # Uncomment the below when resolved to track the latest conda update
-  # as_jenkins conda update -y -n base conda
-
-  if [[ $(uname -m) == "aarch64" ]]; then
-    export SYSROOT_DEP="sysroot_linux-aarch64=2.17"
-  else
-    export SYSROOT_DEP="sysroot_linux-64=2.17"
-  fi
-
-  # Install correct Python version
-  # Also ensure sysroot is using a modern GLIBC to match system compilers
-  as_jenkins conda create -n py_$ANACONDA_PYTHON_VERSION -y\
-             python="$ANACONDA_PYTHON_VERSION" \
-             ${SYSROOT_DEP}
-
-  # libstdcxx from conda default channels are too old, we need GLIBCXX_3.4.30
-  # which is provided in libstdcxx 12 and up.
-  conda_install libstdcxx-ng=12.3.0 --update-deps -c conda-forge
-
-  # Miniforge installer doesn't install sqlite by default
-  if [[ "$BUILD_ENVIRONMENT" == *rocm* ]]; then
-    conda_install sqlite
-  fi
-
-  # Install PyTorch conda deps, as per https://github.com/pytorch/pytorch README
-  if [[ $(uname -m) != "aarch64" ]]; then
-    pip_install mkl==2024.2.0
-    pip_install mkl-static==2024.2.0
-    pip_install mkl-include==2024.2.0
-  fi
-
-  # Install llvm-8 as it is required to compile llvmlite-0.30.0 from source
-  # and libpython-static for torch deploy
-  conda_install llvmdev=8.0.0 "libpython-static=${ANACONDA_PYTHON_VERSION}"
-
-  # Magma package names are concatenation of CUDA major and minor ignoring revision
-  # I.e. magma-cuda102 package corresponds to CUDA_VERSION=10.2 and CUDA_VERSION=10.2.89
-  # Magma is installed from a tarball in the ossci-linux bucket into the conda env
-  if [ -n "$CUDA_VERSION" ]; then
-    conda_run ${SCRIPT_FOLDER}/install_magma_conda.sh $(cut -f1-2 -d'.' <<< ${CUDA_VERSION})
-  fi
-
-  if [[ "$UBUNTU_VERSION" == "24.04"* ]] ; then
-    conda_install_through_forge libstdcxx-ng=14
-  fi
-
-  # Install some other packages, including those needed for Python test reporting
-  pip_install -r /opt/conda/requirements-ci.txt
-
-  if [ -n "$DOCS" ]; then
-    apt-get update
-    apt-get -y install expect-dev
-
-    # We are currently building docs with python 3.8 (min support version)
-    pip_install -r /opt/conda/requirements-docs.txt
-  fi
-
-  popd
-fi
--- a/.ci/docker/common/install_conda_docker.sh
+++ b/.ci/docker/common/install_conda_docker.sh
@ -1,20 +0,0 @@
-#!/bin/bash
-# Script used only in CD pipeline
-set -ex
-
-# Anaconda
-# Latest anaconda is using openssl-3 which is incompatible with all currently published versions of git
-# Which are using openssl-1.1.1, see https://anaconda.org/anaconda/git/files?version=2.40.1 for example
-MINICONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py311_23.5.2-0-Linux-x86_64.sh
-wget -q $MINICONDA_URL
-# NB: Manually invoke bash per https://github.com/conda/conda/issues/10431
-bash $(basename "$MINICONDA_URL") -b -p /opt/conda
-rm $(basename "$MINICONDA_URL")
-export PATH=/opt/conda/bin:$PATH
-# See https://github.com/pytorch/builder/issues/1473
-# Pin conda to 23.5.2 as it's the last one compatible with openssl-1.1.1
-conda install -y conda=23.5.2 conda-build anaconda-client git ninja
-# The cmake version here needs to match with the minimum version of cmake
-# supported by PyTorch (3.18). There is only 3.18.2 on anaconda
-/opt/conda/bin/pip3 install cmake==3.18.2
-conda remove -y --force patchelf
--- a/.ci/docker/common/install_cpython.sh
+++ b/.ci/docker/common/install_cpython.sh
@ -1,107 +0,0 @@
-#!/bin/bash
-# Script used only in CD pipeline
-set -uex -o pipefail
-
-PYTHON_DOWNLOAD_URL=https://www.python.org/ftp/python
-GET_PIP_URL=https://bootstrap.pypa.io/get-pip.py
-
-# Python versions to be installed in /opt/$VERSION_NO
-CPYTHON_VERSIONS=${CPYTHON_VERSIONS:-"3.9.0 3.10.1 3.11.0 3.12.0 3.13.0 3.13.0t 3.14.0 3.14.0t"}
-
-function check_var {
-    if [ -z "$1" ]; then
-        echo "required variable not defined"
-        exit 1
-    fi
-}
-
-function do_cpython_build {
-    local py_ver=$1
-    local py_folder=$2
-    check_var $py_ver
-    check_var $py_folder
-    tar -xzf Python-$py_ver.tgz
-
-    local additional_flags=""
-    if [[ "$py_ver" == *"t" ]]; then
-        additional_flags=" --disable-gil"
-    fi
-
-    pushd $py_folder
-
-    local prefix="/opt/_internal/cpython-${py_ver}"
-    mkdir -p ${prefix}/lib
-    if [[ -n $(which patchelf) ]]; then
-        local shared_flags="--enable-shared"
-    else
-        local shared_flags="--disable-shared"
-    fi
-    if [[ -z  "${WITH_OPENSSL+x}" ]]; then
-        local openssl_flags=""
-    else
-        local openssl_flags="--with-openssl=${WITH_OPENSSL} --with-openssl-rpath=auto"
-    fi
-
-
-
-    # -Wformat added for https://bugs.python.org/issue17547 on Python 2.6
-    CFLAGS="-Wformat" ./configure --prefix=${prefix} ${openssl_flags} ${shared_flags} ${additional_flags} > /dev/null
-
-    make -j40 > /dev/null
-    make install > /dev/null
-
-    if [[ "${shared_flags}" == "--enable-shared" ]]; then
-        patchelf --set-rpath '$ORIGIN/../lib' ${prefix}/bin/python3
-    fi
-
-    popd
-    rm -rf $py_folder
-    # Some python's install as bin/python3. Make them available as
-    # bin/python.
-    if [ -e ${prefix}/bin/python3 ]; then
-        ln -s python3 ${prefix}/bin/python
-    fi
-    ${prefix}/bin/python get-pip.py
-    if [ -e ${prefix}/bin/pip3 ] && [ ! -e ${prefix}/bin/pip ]; then
-        ln -s pip3 ${prefix}/bin/pip
-    fi
-    # install setuptools since python 3.12 is required to use distutils
-    # packaging is needed to create symlink since wheel no longer provides needed information
-    ${prefix}/bin/pip install packaging==25.0 wheel==0.45.1 setuptools==80.9.0
-    local abi_tag=$(${prefix}/bin/python -c "from packaging.tags import interpreter_name, interpreter_version; import sysconfig ; from sysconfig import get_config_var; print('{0}{1}-{0}{1}{2}'.format(interpreter_name(), interpreter_version(), 't' if sysconfig.get_config_var('Py_GIL_DISABLED') else ''))")
-    ln -sf ${prefix} /opt/python/${abi_tag}
-}
-
-function build_cpython {
-    local py_ver=$1
-    check_var $py_ver
-    local py_suffix=$py_ver
-    local py_folder=$py_ver
-
-    # Special handling for nogil
-    if [[ "${py_ver}" == *"t" ]]; then
-        py_suffix=${py_ver::-1}
-        py_folder=$py_suffix
-    fi
-    # Update to rc2 due to https://github.com/python/cpython/commit/c72699086fe4
-    if [ "$py_suffix" == "3.14.0" ]; then
-        py_suffix="3.14.0rc2"
-    fi
-    wget -q $PYTHON_DOWNLOAD_URL/$py_folder/Python-$py_suffix.tgz -O Python-$py_ver.tgz
-    do_cpython_build $py_ver Python-$py_suffix
-
-    rm -f Python-$py_ver.tgz
-}
-
-function build_cpythons {
-    check_var $GET_PIP_URL
-    curl -sLO $GET_PIP_URL
-    for py_ver in $@; do
-        build_cpython $py_ver
-    done
-    rm -f get-pip.py
-}
-
-mkdir -p /opt/python
-mkdir -p /opt/_internal
-build_cpythons $CPYTHON_VERSIONS
--- a/.ci/docker/common/install_cuda.sh
+++ b/.ci/docker/common/install_cuda.sh
@ -1,185 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-arch_path=''
-targetarch=${TARGETARCH:-$(uname -m)}
-if [ ${targetarch} = 'amd64' ] || [ "${targetarch}" = 'x86_64' ]; then
-  arch_path='x86_64'
-else
-  arch_path='sbsa'
-fi
-
-NVSHMEM_VERSION=3.3.24
-
-function install_cuda {
-  version=$1
-  runfile=$2
-  major_minor=${version%.*}
-  rm -rf /usr/local/cuda-${major_minor} /usr/local/cuda
-  if [[ ${arch_path} == 'sbsa' ]]; then
-      runfile="${runfile}_sbsa"
-  fi
-  runfile="${runfile}.run"
-  wget -q https://developer.download.nvidia.com/compute/cuda/${version}/local_installers/${runfile} -O ${runfile}
-  chmod +x ${runfile}
-  ./${runfile} --toolkit --silent
-  rm -f ${runfile}
-  rm -f /usr/local/cuda && ln -s /usr/local/cuda-${major_minor} /usr/local/cuda
-}
-
-function install_cudnn {
-  cuda_major_version=$1
-  cudnn_version=$2
-  mkdir tmp_cudnn && cd tmp_cudnn
-  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-  filepath="cudnn-linux-${arch_path}-${cudnn_version}_cuda${cuda_major_version}-archive"
-  wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-${arch_path}/${filepath}.tar.xz
-  tar xf ${filepath}.tar.xz
-  cp -a ${filepath}/include/* /usr/local/cuda/include/
-  cp -a ${filepath}/lib/* /usr/local/cuda/lib64/
-  cd ..
-  rm -rf tmp_cudnn
-}
-
-function install_nvshmem {
-  cuda_major_version=$1      # e.g. "12"
-  nvshmem_version=$2         # e.g. "3.3.9"
-
-  case "${arch_path}" in
-    sbsa)
-      dl_arch="aarch64"
-      ;;
-    x86_64)
-      dl_arch="x64"
-      ;;
-    *)
-      dl_arch="${arch}"
-      ;;
-  esac
-
-  tmpdir="tmp_nvshmem"
-  mkdir -p "${tmpdir}" && cd "${tmpdir}"
-
-  # nvSHMEM license: https://docs.nvidia.com/nvshmem/api/sla.html
-  # This pattern is a lie as it is not consistent across versions, for 3.3.9 it was cuda_ver-arch-nvshhem-ver
-  filename="libnvshmem-linux-${arch_path}-${nvshmem_version}_cuda${cuda_major_version}-archive"
-  suffix=".tar.xz"
-  url="https://developer.download.nvidia.com/compute/nvshmem/redist/libnvshmem/linux-${arch_path}/${filename}${suffix}"
-
-  # download, unpack, install
-  wget -q "${url}"
-  tar xf "${filename}${suffix}"
-  cp -a "${filename}/include/"* /usr/local/cuda/include/
-  cp -a "${filename}/lib/"*     /usr/local/cuda/lib64/
-
-  # cleanup
-  cd ..
-  rm -rf "${tmpdir}"
-
-  echo "nvSHMEM ${nvshmem_version} for CUDA ${cuda_major_version} (${arch_path}) installed."
-}
-
-function install_124 {
-  CUDNN_VERSION=9.1.0.70
-  echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.2"
-  install_cuda 12.4.1 cuda_12.4.1_550.54.15_linux
-
-  install_cudnn 12 $CUDNN_VERSION
-
-  CUDA_VERSION=12.4 bash install_nccl.sh
-
-  CUDA_VERSION=12.4 bash install_cusparselt.sh
-
-  ldconfig
-}
-
-function install_126 {
-  CUDNN_VERSION=9.10.2.21
-  echo "Installing CUDA 12.6.3 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
-  install_cuda 12.6.3 cuda_12.6.3_560.35.05_linux
-
-  install_cudnn 12 $CUDNN_VERSION
-
-  install_nvshmem 12 $NVSHMEM_VERSION
-
-  CUDA_VERSION=12.6 bash install_nccl.sh
-
-  CUDA_VERSION=12.6 bash install_cusparselt.sh
-
-  ldconfig
-}
-
-function install_129 {
-  CUDNN_VERSION=9.10.2.21
-  echo "Installing CUDA 12.9.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
-  # install CUDA 12.9.1 in the same container
-  install_cuda 12.9.1 cuda_12.9.1_575.57.08_linux
-
-  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-  install_cudnn 12 $CUDNN_VERSION
-
-  install_nvshmem 12 $NVSHMEM_VERSION
-
-  CUDA_VERSION=12.9 bash install_nccl.sh
-
-  CUDA_VERSION=12.9 bash install_cusparselt.sh
-
-  ldconfig
-}
-
-function install_128 {
-  CUDNN_VERSION=9.8.0.87
-  echo "Installing CUDA 12.8.1 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
-  # install CUDA 12.8.1 in the same container
-  install_cuda 12.8.1 cuda_12.8.1_570.124.06_linux
-
-  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-  install_cudnn 12 $CUDNN_VERSION
-
-  install_nvshmem 12 $NVSHMEM_VERSION
-
-  CUDA_VERSION=12.8 bash install_nccl.sh
-
-  CUDA_VERSION=12.8 bash install_cusparselt.sh
-
-  ldconfig
-}
-
-function install_130 {
-  CUDNN_VERSION=9.13.0.50
-  echo "Installing CUDA 13.0 and cuDNN ${CUDNN_VERSION} and NVSHMEM and NCCL and cuSparseLt-0.7.1"
-  # install CUDA 13.0 in the same container
-  install_cuda 13.0.0 cuda_13.0.0_580.65.06_linux
-
-  # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-  install_cudnn 13 $CUDNN_VERSION
-
-  install_nvshmem 13 $NVSHMEM_VERSION
-
-  CUDA_VERSION=13.0 bash install_nccl.sh
-
-  CUDA_VERSION=13.0 bash install_cusparselt.sh
-
-  ldconfig
-}
-
-# idiomatic parameter and option handling in sh
-while test $# -gt 0
-do
-    case "$1" in
-    12.4) install_124;
-        ;;
-    12.6|12.6.*) install_126;
-        ;;
-    12.8|12.8.*) install_128;
-        ;;
-    12.9|12.9.*) install_129;
-        ;;
-    13.0|13.0.*) install_130;
-        ;;
-    *) echo "bad argument $1"; exit 1
-        ;;
-    esac
-    shift
-done
--- a/.ci/docker/common/install_cudss.sh
+++ b/.ci/docker/common/install_cudss.sh
@ -1,25 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-# cudss license: https://docs.nvidia.com/cuda/cudss/license.html
-mkdir tmp_cudss && cd tmp_cudss
-
-if [[ ${CUDA_VERSION:0:4} =~ ^12\.[1-4]$ ]]; then
-    arch_path='sbsa'
-    export TARGETARCH=${TARGETARCH:-$(uname -m)}
-    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
-        arch_path='x86_64'
-    fi
-    CUDSS_NAME="libcudss-linux-${arch_path}-0.3.0.9_cuda12-archive"
-    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cudss/redist/libcudss/linux-${arch_path}/${CUDSS_NAME}.tar.xz
-
-    # only for cuda 12
-    tar xf ${CUDSS_NAME}.tar.xz
-    cp -a ${CUDSS_NAME}/include/* /usr/local/cuda/include/
-    cp -a ${CUDSS_NAME}/lib/* /usr/local/cuda/lib64/
-fi
-
-cd ..
-rm -rf tmp_cudss
-ldconfig
--- a/.ci/docker/common/install_cusparselt.sh
+++ b/.ci/docker/common/install_cusparselt.sh
@ -1,41 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
-mkdir tmp_cusparselt && cd tmp_cusparselt
-
-if [[ ${CUDA_VERSION:0:4} =~ "13" ]]; then
-    arch_path='sbsa'
-    export TARGETARCH=${TARGETARCH:-$(uname -m)}
-    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
-        arch_path='x86_64'
-    fi
-    CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.8.0.4_cuda13-archive"
-    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
-elif [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-9]$ ]]; then
-    arch_path='sbsa'
-    export TARGETARCH=${TARGETARCH:-$(uname -m)}
-    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
-        arch_path='x86_64'
-    fi
-    CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.7.1.0-archive"
-    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
-elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
-    arch_path='sbsa'
-    export TARGETARCH=${TARGETARCH:-$(uname -m)}
-    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
-        arch_path='x86_64'
-    fi
-    CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.2.3-archive"
-    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
-else
-    echo "Not sure which libcusparselt version to install for this ${CUDA_VERSION}"
-fi
-
-tar xf ${CUSPARSELT_NAME}.tar.xz
-cp -a ${CUSPARSELT_NAME}/include/* /usr/local/cuda/include/
-cp -a ${CUSPARSELT_NAME}/lib/* /usr/local/cuda/lib64/
-cd ..
-rm -rf tmp_cusparselt
-ldconfig
--- a/.ci/docker/common/install_docs_reqs.sh
+++ b/.ci/docker/common/install_docs_reqs.sh
@ -1,25 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-if [ -n "$KATEX" ]; then
-  apt-get update
-  # Ignore error if gpg-agent doesn't exist (for Ubuntu 16.04)
-  apt-get install -y gpg-agent || :
-
-  curl --retry 3 -sL https://deb.nodesource.com/setup_16.x | sudo -E bash -
-  sudo apt-get install -y nodejs
-
-  curl --retry 3 -sS https://dl.yarnpkg.com/debian/pubkey.gpg | sudo apt-key add -
-  echo "deb https://dl.yarnpkg.com/debian/ stable main" | sudo tee /etc/apt/sources.list.d/yarn.list
-
-  apt-get update
-  apt-get install -y --no-install-recommends yarn
-  yarn global add katex --prefix /usr/local
-
-  sudo apt-get -y install doxygen
-
-  apt-get autoclean && apt-get clean
-  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-
-fi
--- a/.ci/docker/common/install_executorch.sh
+++ b/.ci/docker/common/install_executorch.sh
@ -1,68 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
-
-clone_executorch() {
-  EXECUTORCH_PINNED_COMMIT=$(get_pinned_commit executorch)
-
-  # Clone the Executorch
-  git clone https://github.com/pytorch/executorch.git
-
-  # and fetch the target commit
-  pushd executorch
-  git checkout "${EXECUTORCH_PINNED_COMMIT}"
-  git submodule update --init --recursive
-  popd
-
-  chown -R jenkins executorch
-}
-
-install_buck2() {
-  pushd executorch/.ci/docker
-
-  BUCK2_VERSION=$(cat ci_commit_pins/buck2.txt)
-  source common/install_buck.sh
-
-  popd
-}
-
-install_conda_dependencies() {
-  pushd executorch/.ci/docker
-  # Install conda dependencies like flatbuffer
-  conda_install --file conda-env-ci.txt
-  popd
-}
-
-install_pip_dependencies() {
-  pushd executorch
-  as_jenkins bash install_executorch.sh
-
-  # A workaround, ExecuTorch has moved to numpy 2.0 which is not compatible with the current
-  # numba and scipy version used in PyTorch CI
-  conda_run pip uninstall -y numba scipy
-  # Yaspin is needed for running CI test (get_benchmark_analysis_data.py)
-  pip_install yaspin==3.1.0
-
-  popd
-}
-
-setup_executorch() {
-  export PYTHON_EXECUTABLE=python
-  export CMAKE_ARGS="-DEXECUTORCH_BUILD_PYBIND=ON -DEXECUTORCH_BUILD_XNNPACK=ON -DEXECUTORCH_BUILD_KERNELS_QUANTIZED=ON -DEXECUTORCH_BUILD_TESTS=ON"
-
-  as_jenkins .ci/scripts/setup-linux.sh --build-tool cmake || true
-}
-
-if [ $# -eq 0 ]; then
-  clone_executorch
-  install_buck2
-  install_conda_dependencies
-  install_pip_dependencies
-  pushd executorch
-  setup_executorch
-  popd
-else
-  "$@"
-fi
--- a/.ci/docker/common/install_gcc.sh
+++ b/.ci/docker/common/install_gcc.sh
@ -1,20 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-if [ -n "$GCC_VERSION" ]; then
-
-  # Need the official toolchain repo to get alternate packages
-  add-apt-repository ppa:ubuntu-toolchain-r/test
-  apt-get update
-  apt-get install -y g++-$GCC_VERSION
-  update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-"$GCC_VERSION" 50
-  update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-"$GCC_VERSION" 50
-  update-alternatives --install /usr/bin/gcov gcov /usr/bin/gcov-"$GCC_VERSION" 50
-
-
-  # Cleanup package manager
-  apt-get autoclean && apt-get clean
-  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-
-fi
--- a/.ci/docker/common/install_halide.sh
+++ b/.ci/docker/common/install_halide.sh
@ -1,48 +0,0 @@
-#!/bin/bash
-set -ex
-
-source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
-
-COMMIT=$(get_pinned_commit halide)
-test -n "$COMMIT"
-
-# activate conda to populate CONDA_PREFIX
-test -n "$ANACONDA_PYTHON_VERSION"
-eval "$(conda shell.bash hook)"
-conda activate py_$ANACONDA_PYTHON_VERSION
-
-if [ -n "${UBUNTU_VERSION}" ];then
-    apt update
-    apt-get install -y lld liblld-15-dev libpng-dev libjpeg-dev libgl-dev \
-                  libopenblas-dev libeigen3-dev libatlas-base-dev libzstd-dev
-fi
-
-pip_install numpy scipy imageio cmake ninja
-
-git clone --depth 1 --branch release/16.x --recursive https://github.com/llvm/llvm-project.git
-cmake -DCMAKE_BUILD_TYPE=Release \
-        -DLLVM_ENABLE_PROJECTS="clang" \
-        -DLLVM_TARGETS_TO_BUILD="X86;NVPTX" \
-        -DLLVM_ENABLE_TERMINFO=OFF -DLLVM_ENABLE_ASSERTIONS=ON \
-        -DLLVM_ENABLE_EH=ON -DLLVM_ENABLE_RTTI=ON -DLLVM_BUILD_32_BITS=OFF \
-        -S llvm-project/llvm -B llvm-build -G Ninja
-cmake --build llvm-build
-cmake --install llvm-build --prefix llvm-install
-export LLVM_ROOT=`pwd`/llvm-install
-export LLVM_CONFIG=$LLVM_ROOT/bin/llvm-config
-
-git clone https://github.com/halide/Halide.git
-pushd Halide
-git checkout ${COMMIT} && git submodule update --init --recursive
-pip_install -r requirements.txt
-# NOTE: pybind has a requirement for cmake > 3.5 so set the minimum cmake version here with a flag
-#       Context: https://github.com/pytorch/pytorch/issues/150420
-cmake -G Ninja -DCMAKE_POLICY_VERSION_MINIMUM=3.5 -DCMAKE_BUILD_TYPE=Release -S . -B build
-cmake --build build
-test -e ${CONDA_PREFIX}/lib/python3 || ln -s python${ANACONDA_PYTHON_VERSION} ${CONDA_PREFIX}/lib/python3
-cmake --install build --prefix ${CONDA_PREFIX}
-chown -R jenkins ${CONDA_PREFIX}
-popd
-rm -rf Halide llvm-build llvm-project llvm-install
-
-python -c "import halide"  # check for errors
--- a/.ci/docker/common/install_inductor_benchmark_deps.sh
+++ b/.ci/docker/common/install_inductor_benchmark_deps.sh
@ -1,46 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
-
-function install_huggingface() {
-  pip_install -r huggingface-requirements.txt
-}
-
-function install_timm() {
-  local commit
-  commit=$(get_pinned_commit timm)
-
-  pip_install "git+https://github.com/huggingface/pytorch-image-models@${commit}"
-}
-
-function install_torchbench() {
-  local commit
-  commit=$(get_pinned_commit torchbench)
-  git clone https://github.com/pytorch/benchmark torchbench
-  pushd torchbench
-  git checkout "$commit"
-
-  python install.py --continue_on_fail
-
-  echo "Print all dependencies after TorchBench is installed"
-  python -mpip freeze
-  popd
-
-  chown -R jenkins torchbench
-  chown -R jenkins /opt/conda
-}
-
-# Pango is needed for weasyprint which is needed for doctr
-conda_install pango
-
-# Stable packages are ok here, just to satisfy TorchBench check
-pip_install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128
-
-install_torchbench
-install_huggingface
-install_timm
-
-# Clean up
-conda_run pip uninstall -y torch torchvision torchaudio triton torchao
--- a/.ci/docker/common/install_libpng.sh
+++ b/.ci/docker/common/install_libpng.sh
@ -1,23 +0,0 @@
-#!/bin/bash
-# Script used only in CD pipeline
-
-set -ex
-
-LIBPNG_VERSION=1.6.37
-
-mkdir -p libpng
-pushd libpng
-
-wget http://download.sourceforge.net/libpng/libpng-$LIBPNG_VERSION.tar.gz
-tar -xvzf libpng-$LIBPNG_VERSION.tar.gz
-
-pushd libpng-$LIBPNG_VERSION
-
-./configure
-make
-make install
-
-popd
-
-popd
-rm -rf libpng
--- a/.ci/docker/common/install_linter.sh
+++ b/.ci/docker/common/install_linter.sh
@ -1,27 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-if [ -n "${UBUNTU_VERSION}" ]; then
-  apt update
-  apt-get install -y clang doxygen git graphviz nodejs npm libtinfo5
-fi
-
-# Do shallow clone of PyTorch so that we can init lintrunner in Docker build context
-git clone https://github.com/pytorch/pytorch.git --depth 1
-chown -R jenkins pytorch
-
-pushd pytorch
-# Install all linter dependencies
-pip install -r requirements.txt
-lintrunner init
-
-# Cache .lintbin directory as part of the Docker image
-cp -r .lintbin /tmp
-popd
-
-# Node dependencies required by toc linter job
-npm install -g markdown-toc
-
-# Cleaning up
-rm -rf pytorch
--- a/.ci/docker/common/install_magma.sh
+++ b/.ci/docker/common/install_magma.sh
@ -1,27 +0,0 @@
-#!/usr/bin/env bash
-# Script used only in CD pipeline
-
-set -eou pipefail
-
-function do_install() {
-    cuda_version=$1
-    cuda_version_nodot=${1/./}
-
-    MAGMA_VERSION="2.6.1"
-    magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
-
-    cuda_dir="/usr/local/cuda-${cuda_version}"
-    (
-        set -x
-        tmp_dir=$(mktemp -d)
-        pushd ${tmp_dir}
-        curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
-        tar -xvf "${magma_archive}"
-        mkdir -p "${cuda_dir}/magma"
-        mv include "${cuda_dir}/magma/include"
-        mv lib "${cuda_dir}/magma/lib"
-        popd
-    )
-}
-
-do_install $1
--- a/.ci/docker/common/install_magma_conda.sh
+++ b/.ci/docker/common/install_magma_conda.sh
@ -1,23 +0,0 @@
-#!/usr/bin/env bash
-# Script that installs magma from tarball inside conda environment.
-# It replaces anaconda magma-cuda package which is no longer published.
-# Execute it inside active conda environment.
-# See issue: https://github.com/pytorch/pytorch/issues/138506
-
-set -eou pipefail
-
-cuda_version_nodot=${1/./}
-anaconda_dir=${CONDA_PREFIX:-"$(dirname $(which conda))/../"}
-
-MAGMA_VERSION="2.6.1"
-magma_archive="magma-cuda${cuda_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
-(
-    set -x
-    tmp_dir=$(mktemp -d)
-    pushd ${tmp_dir}
-    curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
-    tar -xvf "${magma_archive}"
-    mv include/* "${anaconda_dir}/include/"
-    mv lib/* "${anaconda_dir}/lib"
-    popd
-)
--- a/.ci/docker/common/install_miopen.sh
+++ b/.ci/docker/common/install_miopen.sh
@ -1,129 +0,0 @@
-#!/bin/bash
-# Script used only in CD pipeline
-
-set -ex
-
-ROCM_VERSION=$1
-
-if [[ -z $ROCM_VERSION ]]; then
-    echo "missing ROCM_VERSION"
-    exit 1;
-fi
-
-IS_UBUNTU=0
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-case "$ID" in
-  ubuntu)
-    IS_UBUNTU=1
-    ;;
-  centos|almalinux)
-    IS_UBUNTU=0
-    ;;
-  *)
-    echo "Unable to determine OS..."
-    exit 1
-    ;;
-esac
-
-# To make version comparison easier, create an integer representation.
-save_IFS="$IFS"
-IFS=. ROCM_VERSION_ARRAY=(${ROCM_VERSION})
-IFS="$save_IFS"
-if [[ ${#ROCM_VERSION_ARRAY[@]} == 2 ]]; then
-    ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
-    ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
-    ROCM_VERSION_PATCH=0
-elif [[ ${#ROCM_VERSION_ARRAY[@]} == 3 ]]; then
-    ROCM_VERSION_MAJOR=${ROCM_VERSION_ARRAY[0]}
-    ROCM_VERSION_MINOR=${ROCM_VERSION_ARRAY[1]}
-    ROCM_VERSION_PATCH=${ROCM_VERSION_ARRAY[2]}
-else
-    echo "Unhandled ROCM_VERSION ${ROCM_VERSION}"
-    exit 1
-fi
-ROCM_INT=$(($ROCM_VERSION_MAJOR * 10000 + $ROCM_VERSION_MINOR * 100 + $ROCM_VERSION_PATCH))
-
-# Function to retry functions that sometimes timeout or have flaky failures
-retry () {
-    $*  || (sleep 1 && $*) || (sleep 2 && $*) || (sleep 4 && $*) || (sleep 8 && $*)
-}
-
-# Build custom MIOpen to use comgr for offline compilation.
-
-## Need a sanitized ROCM_VERSION without patchlevel; patchlevel version 0 must be added to paths.
-ROCM_DOTS=$(echo ${ROCM_VERSION} | tr -d -c '.' | wc -c)
-if [[ ${ROCM_DOTS} == 1 ]]; then
-    ROCM_VERSION_NOPATCH="${ROCM_VERSION}"
-    ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}.0"
-else
-    ROCM_VERSION_NOPATCH="${ROCM_VERSION%.*}"
-    ROCM_INSTALL_PATH="/opt/rocm-${ROCM_VERSION}"
-fi
-
-MIOPEN_CMAKE_COMMON_FLAGS="
-DMIOPEN_USE_COMGR=ON
-DMIOPEN_BUILD_DRIVER=OFF
-"
-if [[ $ROCM_INT -ge 60200 ]] && [[ $ROCM_INT -lt 60204 ]]; then
-    MIOPEN_BRANCH="release/rocm-rel-6.2-staging"
-else
-    echo "ROCm ${ROCM_VERSION} does not need any patches, do not build from source"
-    exit 0
-fi
-
-
-if [[ ${IS_UBUNTU} == 1 ]]; then
-  apt-get remove -y miopen-hip
-else
-  # Workaround since almalinux manylinux image already has this and cget doesn't like that
-  rm -rf /usr/local/lib/pkgconfig/sqlite3.pc
-
-  # Versioned package name needs regex match
-  # Use --noautoremove to prevent other rocm packages from being uninstalled
-  yum remove -y miopen-hip* --noautoremove
-fi
-
-git clone https://github.com/ROCm/MIOpen -b ${MIOPEN_BRANCH}
-pushd MIOpen
-# remove .git to save disk space since CI runner was running out
-rm -rf .git
-# Don't build CK to save docker build time
-sed -i '/composable_kernel/d' requirements.txt
-## MIOpen minimum requirements
-cmake -P install_deps.cmake --minimum
-
-# clean up since CI runner was running out of disk space
-rm -rf /tmp/*
-if [[ ${IS_UBUNTU} == 1 ]]; then
-  apt-get autoclean && apt-get clean
-  rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-else
-  yum clean all
-  rm -rf /var/cache/yum
-  rm -rf /var/lib/yum/yumdb
-  rm -rf /var/lib/yum/history
-fi
-
-## Build MIOpen
-mkdir -p build
-cd build
-PKG_CONFIG_PATH=/usr/local/lib/pkgconfig CXX=${ROCM_INSTALL_PATH}/llvm/bin/clang++ cmake .. \
-    ${MIOPEN_CMAKE_COMMON_FLAGS} \
-    ${MIOPEN_CMAKE_DB_FLAGS} \
-    -DCMAKE_PREFIX_PATH="${ROCM_INSTALL_PATH}"
-make MIOpen -j $(nproc)
-
-# Build MIOpen package
-make -j $(nproc) package
-
-# clean up since CI runner was running out of disk space
-rm -rf /usr/local/cget
-
-if [[ ${IS_UBUNTU} == 1 ]]; then
-  sudo dpkg -i miopen-hip*.deb
-else
-  yum install -y miopen-*.rpm
-fi
-
-popd
-rm -rf MIOpen
--- a/.ci/docker/common/install_mkl.sh
+++ b/.ci/docker/common/install_mkl.sh
@ -1,16 +0,0 @@
-#!/bin/bash
-set -ex
-
-# MKL
-MKL_VERSION=2024.2.0
-
-MKLROOT=/opt/intel
-mkdir -p ${MKLROOT}
-pushd /tmp
-
-python3 -mpip install wheel
-python3 -mpip download -d . mkl-static==${MKL_VERSION}
-python3 -m wheel unpack mkl_static-${MKL_VERSION}-py2.py3-none-manylinux1_x86_64.whl
-python3 -m wheel unpack mkl_include-${MKL_VERSION}-py2.py3-none-manylinux1_x86_64.whl
-mv mkl_static-${MKL_VERSION}/mkl_static-${MKL_VERSION}.data/data/lib ${MKLROOT}
-mv mkl_include-${MKL_VERSION}/mkl_include-${MKL_VERSION}.data/data/include ${MKLROOT}
--- a/.ci/docker/common/install_mnist.sh
+++ b/.ci/docker/common/install_mnist.sh
@ -1,13 +0,0 @@
-#!/bin/bash
-# Script used only in CD pipeline
-
-set -ex
-
-mkdir -p /usr/local/mnist/
-
-cd /usr/local/mnist
-
-for img in train-images-idx3-ubyte.gz train-labels-idx1-ubyte.gz t10k-images-idx3-ubyte.gz t10k-labels-idx1-ubyte.gz; do
-  wget -q https://ossci-datasets.s3.amazonaws.com/mnist/$img
-  gzip -d $img
-done
--- a/.ci/docker/common/install_nccl.sh
+++ b/.ci/docker/common/install_nccl.sh
@ -1,28 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-NCCL_VERSION=""
-if [[ ${CUDA_VERSION:0:2} == "11" ]]; then
-  NCCL_VERSION=$(cat ci_commit_pins/nccl-cu11.txt)
-elif [[ ${CUDA_VERSION:0:2} == "12" ]]; then
-  NCCL_VERSION=$(cat ci_commit_pins/nccl-cu12.txt)
-elif [[ ${CUDA_VERSION:0:2} == "13" ]]; then
-  NCCL_VERSION=$(cat ci_commit_pins/nccl-cu13.txt)
-else
-  echo "Unexpected CUDA_VERSION ${CUDA_VERSION}"
-  exit 1
-fi
-
-if [[ -n "${NCCL_VERSION}" ]]; then
-  # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
-  # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
-  git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
-  pushd nccl
-  make -j src.build
-  cp -a build/include/* /usr/local/cuda/include/
-  cp -a build/lib/* /usr/local/cuda/lib64/
-  popd
-  rm -rf nccl
-  ldconfig
-fi
--- a/.ci/docker/common/install_ninja.sh
+++ b/.ci/docker/common/install_ninja.sh
@ -1,18 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-[ -n "$NINJA_VERSION" ]
-
-arch=$(uname -m)
-if [ "$arch" == "aarch64" ]; then
-    url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux-aarch64.zip"
-else
-    url="https://github.com/ninja-build/ninja/releases/download/v${NINJA_VERSION}/ninja-linux.zip"
-fi
-
-pushd /tmp
-wget --no-verbose --output-document=ninja-linux.zip "$url"
-unzip ninja-linux.zip -d /usr/local/bin
-rm -f ninja-linux.zip
-popd
--- a/.ci/docker/common/install_nvpl.sh
+++ b/.ci/docker/common/install_nvpl.sh
@ -1,20 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-function install_nvpl {
-
-    mkdir -p /opt/nvpl/lib /opt/nvpl/include
-
-    wget https://developer.download.nvidia.com/compute/nvpl/redist/nvpl_blas/linux-sbsa/nvpl_blas-linux-sbsa-0.3.0-archive.tar.xz
-    tar xf nvpl_blas-linux-sbsa-0.3.0-archive.tar.xz
-    cp -r nvpl_blas-linux-sbsa-0.3.0-archive/lib/* /opt/nvpl/lib/
-    cp -r nvpl_blas-linux-sbsa-0.3.0-archive/include/* /opt/nvpl/include/
-
-    wget https://developer.download.nvidia.com/compute/nvpl/redist/nvpl_lapack/linux-sbsa/nvpl_lapack-linux-sbsa-0.2.3.1-archive.tar.xz
-    tar xf nvpl_lapack-linux-sbsa-0.2.3.1-archive.tar.xz
-    cp -r nvpl_lapack-linux-sbsa-0.2.3.1-archive/lib/* /opt/nvpl/lib/
-    cp -r nvpl_lapack-linux-sbsa-0.2.3.1-archive/include/* /opt/nvpl/include/
-}
-
-install_nvpl
--- a/.ci/docker/common/install_onnx.sh
+++ b/.ci/docker/common/install_onnx.sh
@ -1,38 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
-
-retry () {
-    "$@" || (sleep 10 && "$@") || (sleep 20 && "$@") || (sleep 40 && "$@")
-}
-
-# ONNXRuntime should be installed before installing
-# onnx-weekly. Otherwise, onnx-weekly could be
-# overwritten by onnx.
-pip_install \
-  parameterized==0.8.1 \
-  pytest-cov==4.0.0 \
-  pytest-subtests==0.10.0 \
-  tabulate==0.9.0 \
-  transformers==4.36.2
-
-pip_install coloredlogs packaging
-pip_install onnxruntime==1.22.1
-pip_install onnxscript==0.4.0
-
-# Cache the transformers model to be used later by ONNX tests. We need to run the transformers
-# package to download the model. By default, the model is cached at ~/.cache/huggingface/hub/
-IMPORT_SCRIPT_FILENAME="/tmp/onnx_import_script.py"
-as_jenkins echo 'import transformers; transformers.GPTJForCausalLM.from_pretrained("hf-internal-testing/tiny-random-gptj");' > "${IMPORT_SCRIPT_FILENAME}"
-
-# Need a PyTorch version for transformers to work
-pip_install --pre torch --index-url https://download.pytorch.org/whl/nightly/cpu
-# Very weird quoting behavior here https://github.com/conda/conda/issues/10972,
-# so echo the command to a file and run the file instead
-conda_run python "${IMPORT_SCRIPT_FILENAME}"
-
-# Cleaning up
-conda_run pip uninstall -y torch
-rm "${IMPORT_SCRIPT_FILENAME}" || true
--- a/.ci/docker/common/install_openblas.sh
+++ b/.ci/docker/common/install_openblas.sh
@ -1,21 +0,0 @@
-#!/bin/bash
-# Script used only in CD pipeline
-
-set -ex
-
-cd /
-git clone https://github.com/OpenMathLib/OpenBLAS.git -b "${OPENBLAS_VERSION:-v0.3.30}" --depth 1 --shallow-submodules
-
-OPENBLAS_CHECKOUT_DIR="OpenBLAS"
-OPENBLAS_BUILD_FLAGS="
-NUM_THREADS=128
-USE_OPENMP=1
-NO_SHARED=0
-DYNAMIC_ARCH=1
-TARGET=ARMV8
-CFLAGS=-O3
-BUILD_BFLOAT16=1
-"
-
-make -j8 ${OPENBLAS_BUILD_FLAGS} -C ${OPENBLAS_CHECKOUT_DIR}
-make -j8 ${OPENBLAS_BUILD_FLAGS} install -C ${OPENBLAS_CHECKOUT_DIR}
--- a/.ci/docker/common/install_openssl.sh
+++ b/.ci/docker/common/install_openssl.sh
@ -1,17 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-OPENSSL=openssl-1.1.1k
-
-wget -q -O "${OPENSSL}.tar.gz" "https://ossci-linux.s3.amazonaws.com/${OPENSSL}.tar.gz"
-tar xf "${OPENSSL}.tar.gz"
-cd "${OPENSSL}"
-./config --prefix=/opt/openssl -d '-Wl,--enable-new-dtags,-rpath,$(LIBRPATH)'
-# NOTE: openssl install errors out when built with the -j option
-NPROC=$[$(nproc) - 2]
-make -j${NPROC}; make install_sw
-# Link the ssl libraries to the /usr/lib folder.
-sudo ln -s /opt/openssl/lib/lib* /usr/lib
-cd ..
-rm -rf "${OPENSSL}"
--- a/.ci/docker/common/install_patchelf.sh
+++ b/.ci/docker/common/install_patchelf.sh
@ -1,16 +0,0 @@
-#!/bin/bash
-# Script used only in CD pipeline
-
-set -ex
-
-# Pin the version to latest release 0.17.2, building newer commit starts
-# to fail on the current image
-git clone -b 0.17.2 --single-branch https://github.com/NixOS/patchelf
-cd patchelf
-sed -i 's/serial/parallel/g' configure.ac
-./bootstrap.sh
-./configure
-make
-make install
-cd ..
-rm -rf patchelf
--- a/.ci/docker/common/install_python.sh
+++ b/.ci/docker/common/install_python.sh
@ -1,15 +0,0 @@
-#!/bin/bash
-set -ex
-
-apt-get update
-# Use deadsnakes in case we need an older python version
-sudo add-apt-repository ppa:deadsnakes/ppa
-apt-get install -y python${PYTHON_VERSION} python${PYTHON_VERSION}-dev python3-pip python${PYTHON_VERSION}-venv
-
-# Use a venv because uv and some other package managers don't support --user install
-ln -s /usr/bin/python${PYTHON_VERSION} /usr/bin/python
-python -m venv /var/lib/jenkins/ci_env
-source /var/lib/jenkins/ci_env/bin/activate
-
-python -mpip install --upgrade pip
-python -mpip install -r /opt/requirements-ci.txt
--- a/.ci/docker/common/install_rocm.sh
+++ b/.ci/docker/common/install_rocm.sh
@ -1,208 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-# for pip_install function
-source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
-
-ROCM_COMPOSABLE_KERNEL_VERSION="$(cat $(dirname $0)/../ci_commit_pins/rocm-composable-kernel.txt)"
-
-ver() {
-    printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
-}
-
-install_ubuntu() {
-    apt-get update
-    # gpg-agent is not available by default
-    apt-get install -y --no-install-recommends gpg-agent
-    if [[ $(ver $UBUNTU_VERSION) -ge $(ver 22.04) ]]; then
-        echo -e 'Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600' \
-            | sudo tee /etc/apt/preferences.d/rocm-pin-600
-    fi
-    apt-get install -y kmod
-    apt-get install -y wget
-
-    # Need the libc++1 and libc++abi1 libraries to allow torch._C to load at runtime
-    apt-get install -y libc++1
-    apt-get install -y libc++abi1
-
-    # Make sure rocm packages from repo.radeon.com have highest priority
-    cat << EOF > /etc/apt/preferences.d/rocm-pin-600
-Package: *
-Pin: release o=repo.radeon.com
-Pin-Priority: 600
-EOF
-
-    # we want the patch version of 6.4 instead
-    if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
-        ROCM_VERSION="${ROCM_VERSION}.2"
-    fi
-
-    # Default url values
-    rocm_baseurl="http://repo.radeon.com/rocm/apt/${ROCM_VERSION}"
-    amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/ubuntu"
-
-    # Special case for ROCM_VERSION == 7.0
-    if [[ $(ver "$ROCM_VERSION") -eq $(ver 7.0) ]]; then
-        rocm_baseurl="https://repo.radeon.com/rocm/apt/7.0_alpha2"
-        amdgpu_baseurl="https://repo.radeon.com/amdgpu/30.10_alpha2/ubuntu"
-    fi
-
-    # Add amdgpu repository
-    UBUNTU_VERSION_NAME=`cat /etc/os-release | grep UBUNTU_CODENAME | awk -F= '{print $2}'`
-    echo "deb [arch=amd64] ${amdgpu_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/amdgpu.list
-
-    # Add rocm repository
-    wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
-    echo "deb [arch=amd64] ${rocm_baseurl} ${UBUNTU_VERSION_NAME} main" > /etc/apt/sources.list.d/rocm.list
-    apt-get update --allow-insecure-repositories
-
-    DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
-                   rocm-dev \
-                   rocm-utils \
-                   rocm-libs \
-                   rccl \
-                   rocprofiler-dev \
-                   roctracer-dev \
-                   amd-smi-lib
-
-    if [[ $(ver $ROCM_VERSION) -ge $(ver 6.1) ]]; then
-        DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated rocm-llvm-dev
-    fi
-
-    # precompiled miopen kernels added in ROCm 3.5, renamed in ROCm 5.5
-    # search for all unversioned packages
-    # if search fails it will abort this script; use true to avoid case where search fails
-    MIOPENHIPGFX=$(apt-cache search --names-only miopen-hip-gfx | awk '{print $1}' | grep -F -v . || true)
-    if [[ "x${MIOPENHIPGFX}" = x ]]; then
-      echo "miopen-hip-gfx package not available" && exit 1
-    else
-      DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated ${MIOPENHIPGFX}
-    fi
-
-    # ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
-    for kdb in /opt/rocm/share/miopen/db/*.kdb
-    do
-        sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
-    done
-
-    # ROCm 6.3 had a regression where initializing static code objects had significant overhead
-    # CI no longer builds for ROCm 6.3, but
-    # ROCm 6.4 did not yet fix the regression, also HIP branch names are different
-    if [[ $(ver $ROCM_VERSION) -ge $(ver 6.4) ]] && [[ $(ver $ROCM_VERSION) -lt $(ver 7.0) ]]; then
-        if [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.2) ]]; then
-            HIP_TAG=rocm-6.4.2
-            CLR_HASH=74d78ba3ac4bac235d02bcb48511c30b5cfdd457  # branch release/rocm-rel-6.4.2-statco-hotfix
-        elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4.1) ]]; then
-            HIP_TAG=rocm-6.4.1
-            CLR_HASH=efe6c35790b9206923bfeed1209902feff37f386  # branch release/rocm-rel-6.4.1-statco-hotfix
-        elif [[ $(ver $ROCM_VERSION) -eq $(ver 6.4) ]]; then
-            HIP_TAG=rocm-6.4.0
-            CLR_HASH=600f5b0d2baed94d5121e2174a9de0851b040b0c  # branch release/rocm-rel-6.4-statco-hotfix
-        fi
-        # clr build needs CppHeaderParser but can only find it using conda's python
-        python -m pip install CppHeaderParser
-        git clone https://github.com/ROCm/HIP -b $HIP_TAG
-        HIP_COMMON_DIR=$(readlink -f HIP)
-        git clone https://github.com/jeffdaily/clr
-        pushd clr
-        git checkout $CLR_HASH
-        popd
-        mkdir -p clr/build
-        pushd clr/build
-        # Need to point CMake to the correct python installation to find CppHeaderParser
-        cmake .. -DPython3_EXECUTABLE=/opt/conda/envs/py_${ANACONDA_PYTHON_VERSION}/bin/python3 -DCLR_BUILD_HIP=ON -DHIP_COMMON_DIR=$HIP_COMMON_DIR
-        make -j
-        cp hipamd/lib/libamdhip64.so.6.4.* /opt/rocm/lib/libamdhip64.so.6.4.*
-        popd
-        rm -rf HIP clr
-    fi
-
-    pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
-
-    # Cleanup
-    apt-get autoclean && apt-get clean
-    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-}
-
-install_centos() {
-
-  yum update -y
-  yum install -y kmod
-  yum install -y wget
-  yum install -y openblas-devel
-
-  yum install -y epel-release
-  yum install -y dkms kernel-headers-`uname -r` kernel-devel-`uname -r`
-
-  # Add amdgpu repository
-  local amdgpu_baseurl
-  if [[ $OS_VERSION == 9 ]]; then
-      amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/9.0/main/x86_64"
-  else
-      amdgpu_baseurl="https://repo.radeon.com/amdgpu/${ROCM_VERSION}/rhel/7.9/main/x86_64"
-  fi
-  echo "[AMDGPU]" > /etc/yum.repos.d/amdgpu.repo
-  echo "name=AMDGPU" >> /etc/yum.repos.d/amdgpu.repo
-  echo "baseurl=${amdgpu_baseurl}" >> /etc/yum.repos.d/amdgpu.repo
-  echo "enabled=1" >> /etc/yum.repos.d/amdgpu.repo
-  echo "gpgcheck=1" >> /etc/yum.repos.d/amdgpu.repo
-  echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/amdgpu.repo
-
-  local rocm_baseurl="http://repo.radeon.com/rocm/yum/${ROCM_VERSION}"
-  echo "[ROCm]" > /etc/yum.repos.d/rocm.repo
-  echo "name=ROCm" >> /etc/yum.repos.d/rocm.repo
-  echo "baseurl=${rocm_baseurl}" >> /etc/yum.repos.d/rocm.repo
-  echo "enabled=1" >> /etc/yum.repos.d/rocm.repo
-  echo "gpgcheck=1" >> /etc/yum.repos.d/rocm.repo
-  echo "gpgkey=http://repo.radeon.com/rocm/rocm.gpg.key" >> /etc/yum.repos.d/rocm.repo
-
-  yum update -y
-
-  yum install -y \
-                   rocm-dev \
-                   rocm-utils \
-                   rocm-libs \
-                   rccl \
-                   rocprofiler-dev \
-                   roctracer-dev \
-                   amd-smi-lib
-
-  # precompiled miopen kernels; search for all unversioned packages
-  # if search fails it will abort this script; use true to avoid case where search fails
-  MIOPENHIPGFX=$(yum -q search miopen-hip-gfx | grep miopen-hip-gfx | awk '{print $1}'| grep -F kdb. || true)
-  if [[ "x${MIOPENHIPGFX}" = x ]]; then
-    echo "miopen-hip-gfx package not available" && exit 1
-  else
-    yum install -y ${MIOPENHIPGFX}
-  fi
-
-  # ROCm 6.0 had a regression where journal_mode was enabled on the kdb files resulting in permission errors at runtime
-  for kdb in /opt/rocm/share/miopen/db/*.kdb
-  do
-      sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
-  done
-
-  pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
-
-  # Cleanup
-  yum clean all
-  rm -rf /var/cache/yum
-  rm -rf /var/lib/yum/yumdb
-  rm -rf /var/lib/yum/history
-}
-
-# Install Python packages depending on the base OS
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-case "$ID" in
-  ubuntu)
-    install_ubuntu
-    ;;
-  centos)
-    install_centos
-    ;;
-  *)
-    echo "Unable to determine OS..."
-    exit 1
-    ;;
-esac
--- a/.ci/docker/common/install_rocm_drm.sh
+++ b/.ci/docker/common/install_rocm_drm.sh
@ -1,150 +0,0 @@
-#!/bin/bash
-# Script used only in CD pipeline
-
-###########################
-### prereqs
-###########################
-# Install Python packages depending on the base OS
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-case "$ID" in
-  ubuntu)
-    apt-get update -y
-    apt-get install -y libpciaccess-dev pkg-config
-    apt-get clean
-    ;;
-  centos|almalinux)
-    yum install -y libpciaccess-devel pkgconfig
-    ;;
-  *)
-    echo "Unable to determine OS..."
-    exit 1
-    ;;
-esac
-python3 -m pip install meson ninja
-
-###########################
-### clone repo
-###########################
-GIT_SSL_NO_VERIFY=true git clone https://gitlab.freedesktop.org/mesa/drm.git
-pushd drm
-
-###########################
-### patch
-###########################
-patch -p1 <<'EOF'
-diff --git a/amdgpu/amdgpu_asic_id.c b/amdgpu/amdgpu_asic_id.c
-index a5007ffc..13fa07fc 100644
--- a/amdgpu/amdgpu_asic_id.c
-+++ b/amdgpu/amdgpu_asic_id.c
-@@ -22,6 +22,13 @@
-  *
-  */
-
-+#define _XOPEN_SOURCE 700
-+#define _LARGEFILE64_SOURCE
-+#define _FILE_OFFSET_BITS 64
-+#include <ftw.h>
-+#include <link.h>
-+#include <limits.h>
-+
- #include <ctype.h>
- #include <stdio.h>
- #include <stdlib.h>
-@@ -34,6 +41,19 @@
- #include "amdgpu_drm.h"
- #include "amdgpu_internal.h"
-
-+static char *amdgpuids_path = NULL;
-+static const char* amdgpuids_path_msg = NULL;
-+
-+static int check_for_location_of_amdgpuids(const char *filepath, const struct stat *info, const int typeflag, struct FTW *pathinfo)
-+{
-+	if (typeflag == FTW_F && strstr(filepath, "amdgpu.ids")) {
-+		amdgpuids_path = strdup(filepath);
-+		return 1;
-+	}
-+
-+	return 0;
-+}
-+
- static int parse_one_line(struct amdgpu_device *dev, const char *line)
- {
- 	char *buf, *saveptr;
-@@ -113,10 +133,46 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
- 	int line_num = 1;
- 	int r = 0;
-
-+	// attempt to find typical location for amdgpu.ids file
- 	fp = fopen(AMDGPU_ASIC_ID_TABLE, "r");
-+
-+	// if it doesn't exist, search
-+	if (!fp) {
-+
-+	char self_path[ PATH_MAX ];
-+	ssize_t count;
-+	ssize_t i;
-+
-+	count = readlink( "/proc/self/exe", self_path, PATH_MAX );
-+	if (count > 0) {
-+		self_path[count] = '\0';
-+
-+		// remove '/bin/python' from self_path
-+		for (i=count; i>0; --i) {
-+			if (self_path[i] == '/') break;
-+			self_path[i] = '\0';
-+		}
-+		self_path[i] = '\0';
-+		for (; i>0; --i) {
-+			if (self_path[i] == '/') break;
-+			self_path[i] = '\0';
-+		}
-+		self_path[i] = '\0';
-+
-+		if (1 == nftw(self_path, check_for_location_of_amdgpuids, 5, FTW_PHYS)) {
-+			fp = fopen(amdgpuids_path, "r");
-+			amdgpuids_path_msg = amdgpuids_path;
-+		}
-+	}
-+
-+	}
-+	else {
-+		amdgpuids_path_msg = AMDGPU_ASIC_ID_TABLE;
-+	}
-+
-+	// both hard-coded location and search have failed
- 	if (!fp) {
-		fprintf(stderr, "%s: %s\n", AMDGPU_ASIC_ID_TABLE,
-			strerror(errno));
-+		//fprintf(stderr, "amdgpu.ids: No such file or directory\n");
- 		return;
- 	}
-
-@@ -132,7 +188,7 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
- 			continue;
- 		}
-
-		drmMsg("%s version: %s\n", AMDGPU_ASIC_ID_TABLE, line);
-+		drmMsg("%s version: %s\n", amdgpuids_path_msg, line);
- 		break;
- 	}
-
-@@ -150,7 +206,7 @@ void amdgpu_parse_asic_ids(struct amdgpu_device *dev)
-
- 	if (r == -EINVAL) {
- 		fprintf(stderr, "Invalid format: %s: line %d: %s\n",
-			AMDGPU_ASIC_ID_TABLE, line_num, line);
-+			amdgpuids_path_msg, line_num, line);
- 	} else if (r && r != -EAGAIN) {
- 		fprintf(stderr, "%s: Cannot parse ASIC IDs: %s\n",
- 			__func__, strerror(-r));
-EOF
-
-###########################
-### build
-###########################
-meson builddir --prefix=/opt/amdgpu
-pushd builddir
-ninja install
-
-popd
-popd
--- a/.ci/docker/common/install_rocm_magma.sh
+++ b/.ci/docker/common/install_rocm_magma.sh
@ -1,37 +0,0 @@
-#!/usr/bin/env bash
-# Script used only in CD pipeline
-
-set -eou pipefail
-
-function do_install() {
-    rocm_version=$1
-    if [[ ${rocm_version} =~ ^[0-9]+\.[0-9]+\.[0-9]+$ ]]; then
-        # chop off any patch version
-        rocm_version="${rocm_version%.*}"
-    fi
-
-    rocm_version_nodot=${rocm_version//./}
-
-    # Version 2.7.2 + ROCm related updates
-    MAGMA_VERSION=a1625ff4d9bc362906bd01f805dbbe12612953f6
-    magma_archive="magma-rocm${rocm_version_nodot}-${MAGMA_VERSION}-1.tar.bz2"
-
-    rocm_dir="/opt/rocm"
-    (
-        set -x
-        tmp_dir=$(mktemp -d)
-        pushd ${tmp_dir}
-        curl -OLs https://ossci-linux.s3.us-east-1.amazonaws.com/${magma_archive}
-        if tar -xvf "${magma_archive}"
-        then
-            mkdir -p "${rocm_dir}/magma"
-            mv include "${rocm_dir}/magma/include"
-            mv lib "${rocm_dir}/magma/lib"
-        else
-            echo "${magma_archive} not found, skipping magma install"
-        fi
-        popd
-    )
-}
-
-do_install $1
--- a/.ci/docker/common/install_triton.sh
+++ b/.ci/docker/common/install_triton.sh
@ -1,107 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-mkdir -p /opt/triton
-if [ -z "${TRITON}" ] && [ -z "${TRITON_CPU}" ]; then
-  echo "TRITON and TRITON_CPU are not set. Exiting..."
-  exit 0
-fi
-
-source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
-
-get_pip_version() {
-  conda_run pip list | grep -w $* | head -n 1 | awk '{print $2}'
-}
-
-if [ -n "${XPU_VERSION}" ]; then
-  TRITON_REPO="https://github.com/intel/intel-xpu-backend-for-triton"
-  TRITON_TEXT_FILE="triton-xpu"
-elif [ -n "${TRITON_CPU}" ]; then
-  TRITON_REPO="https://github.com/triton-lang/triton-cpu"
-  TRITON_TEXT_FILE="triton-cpu"
-else
-  TRITON_REPO="https://github.com/triton-lang/triton"
-  TRITON_TEXT_FILE="triton"
-fi
-
-# The logic here is copied from .ci/pytorch/common_utils.sh
-TRITON_PINNED_COMMIT=$(get_pinned_commit ${TRITON_TEXT_FILE})
-
-if [ -n "${UBUNTU_VERSION}" ];then
-    apt update
-    apt-get install -y gpg-agent
-fi
-
-# Keep the current cmake and numpy version here, so we can reinstall them later
-CMAKE_VERSION=$(get_pip_version cmake)
-NUMPY_VERSION=$(get_pip_version numpy)
-
-if [ -z "${MAX_JOBS}" ]; then
-    export MAX_JOBS=$(nproc)
-fi
-
-# Git checkout triton
-mkdir /var/lib/jenkins/triton
-chown -R jenkins /var/lib/jenkins/triton
-chgrp -R jenkins /var/lib/jenkins/triton
-pushd /var/lib/jenkins/
-
-as_jenkins git clone --recursive ${TRITON_REPO} triton
-cd triton
-as_jenkins git checkout ${TRITON_PINNED_COMMIT}
-as_jenkins git submodule update --init --recursive
-
-# Old versions of python have setup.py in ./python; newer versions have it in ./
-if [ ! -f setup.py ]; then
-  cd python
-fi
-
-pip_install pybind11==3.0.1
-
-# TODO: remove patch setup.py once we have a proper fix for https://github.com/triton-lang/triton/issues/4527
-as_jenkins sed -i -e 's/https:\/\/tritonlang.blob.core.windows.net\/llvm-builds/https:\/\/oaitriton.blob.core.windows.net\/public\/llvm-builds/g' setup.py
-
-if [ -n "${UBUNTU_VERSION}" ] && [ -n "${GCC_VERSION}" ] && [[ "${GCC_VERSION}" == "7" ]]; then
-  # Triton needs at least gcc-9 to build
-  apt-get install -y g++-9
-
-  CXX=g++-9 conda_run python setup.py bdist_wheel
-elif [ -n "${UBUNTU_VERSION}" ] && [ -n "${CLANG_VERSION}" ]; then
-  # Triton needs <filesystem> which surprisingly is not available with clang-9 toolchain
-  add-apt-repository -y ppa:ubuntu-toolchain-r/test
-  apt-get install -y g++-9
-
-  CXX=g++-9 conda_run python setup.py bdist_wheel
-else
-  conda_run python setup.py bdist_wheel
-fi
-
-# Copy the wheel to /opt for multi stage docker builds
-cp dist/*.whl /opt/triton
-# Install the wheel for docker builds that don't use multi stage
-pip_install dist/*.whl
-
-# TODO: This is to make sure that the same cmake and numpy version from install conda
-# script is used. Without this step, the newer cmake version (3.25.2) downloaded by
-# triton build step via pip will fail to detect conda MKL. Once that issue is fixed,
-# this can be removed.
-#
-# The correct numpy version also needs to be set here because conda claims that it
-# causes inconsistent environment.  Without this, conda will attempt to install the
-# latest numpy version, which fails ASAN tests with the following import error: Numba
-# needs NumPy 1.20 or less.
-# Note that we install numpy with pip as conda might not have the version we want
-if [ -n "${CMAKE_VERSION}" ]; then
-  pip_install "cmake==${CMAKE_VERSION}"
-fi
-if [ -n "${NUMPY_VERSION}" ]; then
-  pip_install "numpy==${NUMPY_VERSION}"
-fi
-
-# IMPORTANT: helion needs to be installed without dependencies.
-# It depends on torch and triton. We don't want to install
-# triton and torch from production on Docker CI images
-if [[ "$ANACONDA_PYTHON_VERSION" != 3.9* ]]; then
-  pip_install helion --no-deps
-fi
--- a/.ci/docker/common/install_ucc.sh
+++ b/.ci/docker/common/install_ucc.sh
@ -1,81 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-if [[ -d "/usr/local/cuda/" ]];  then
-  with_cuda=/usr/local/cuda/
-else
-  with_cuda=no
-fi
-
-if [[ -d "/opt/rocm" ]]; then
-  with_rocm=/opt/rocm
-else
-  with_rocm=no
-fi
-
-function install_ucx() {
-  set -ex
-  git clone --recursive https://github.com/openucx/ucx.git
-  pushd ucx
-  git checkout ${UCX_COMMIT}
-  git submodule update --init --recursive
-
-  ./autogen.sh
-  ./configure --prefix=$UCX_HOME      \
-      --enable-mt                     \
-      --with-cuda=$with_cuda          \
-      --with-rocm=$with_rocm          \
-      --enable-profiling              \
-      --enable-stats
-  time make -j
-  sudo make install
-
-  popd
-  rm -rf ucx
-}
-
-function install_ucc() {
-  set -ex
-  git clone --recursive https://github.com/openucx/ucc.git
-  pushd ucc
-  git checkout ${UCC_COMMIT}
-  git submodule update --init --recursive
-
-  ./autogen.sh
-
-  if [[ -n "$CUDA_VERSION"  && $CUDA_VERSION == 13* ]]; then
-    NVCC_GENCODE="-gencode=arch=compute_86,code=compute_86"
-  else
-    # We only run distributed tests on Tesla M60 and A10G
-    NVCC_GENCODE="-gencode=arch=compute_52,code=sm_52 -gencode=arch=compute_86,code=compute_86"
-  fi
-
-  if [[ -n "$ROCM_VERSION" ]]; then
-    if [[ -n "$PYTORCH_ROCM_ARCH" ]]; then
-      amdgpu_targets=`echo $PYTORCH_ROCM_ARCH | sed 's/;/ /g'`
-    else
-      amdgpu_targets=`rocm_agent_enumerator | grep -v gfx000 | sort -u | xargs`
-    fi
-    for arch in $amdgpu_targets; do
-      HIP_OFFLOAD="$HIP_OFFLOAD --offload-arch=$arch"
-    done
-  else
-    HIP_OFFLOAD="all-arch-no-native"
-  fi
-
-  ./configure --prefix=$UCC_HOME          \
-    --with-ucx=$UCX_HOME                  \
-    --with-cuda=$with_cuda                \
-    --with-nvcc-gencode="${NVCC_GENCODE}" \
-    --with-rocm=$with_rocm                \
-    --with-rocm-arch="${HIP_OFFLOAD}"
-  time make -j
-  sudo make install
-
-  popd
-  rm -rf ucc
-}
-
-install_ucx
-install_ucc
--- a/.ci/docker/common/install_user.sh
+++ b/.ci/docker/common/install_user.sh
@ -1,40 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-# Since version 24 the system ships with user 'ubuntu' that has id 1000
-# We need a work-around to enable id 1000 usage for this script
-if [[ $UBUNTU_VERSION == 24.04 ]]; then
-    # touch is used to disable harmless error message
-    touch /var/mail/ubuntu && chown ubuntu /var/mail/ubuntu && userdel -r ubuntu
-fi
-
-# Mirror jenkins user in container
-# jenkins user as ec2-user should have the same user-id
-echo "jenkins:x:1000:1000::/var/lib/jenkins:" >> /etc/passwd
-echo "jenkins:x:1000:" >> /etc/group
-# Needed on focal or newer
-echo "jenkins:*:19110:0:99999:7:::" >>/etc/shadow
-
-# Create $HOME
-mkdir -p /var/lib/jenkins
-chown jenkins:jenkins /var/lib/jenkins
-mkdir -p /var/lib/jenkins/.ccache
-chown jenkins:jenkins /var/lib/jenkins/.ccache
-
-# Allow writing to /usr/local (for make install)
-chown jenkins:jenkins /usr/local
-
-# Allow sudo
-# TODO: Maybe we shouldn't
-echo 'jenkins ALL=(ALL) NOPASSWD:ALL' > /etc/sudoers.d/jenkins
-
-# Work around bug where devtoolset replaces sudo and breaks it.
-if [ -n "$DEVTOOLSET_VERSION" ]; then
-  SUDO=/bin/sudo
-else
-  SUDO=sudo
-fi
-
-# Test that sudo works
-$SUDO -u jenkins $SUDO -v
--- a/.ci/docker/common/install_xpu.sh
+++ b/.ci/docker/common/install_xpu.sh
@ -1,176 +0,0 @@
-#!/bin/bash
-set -xe
-# Script used in CI and CD pipeline
-
-# Intel® software for general purpose GPU capabilities.
-# Refer to https://www.intel.com/content/www/us/en/developer/articles/tool/pytorch-prerequisites-for-intel-gpus.html
-
-# Users should update to the latest version as it becomes available
-
-function install_ubuntu() {
-    . /etc/os-release
-    if [[ ! " jammy " =~ " ${VERSION_CODENAME} " ]]; then
-        echo "Ubuntu version ${VERSION_CODENAME} not supported"
-        exit
-    fi
-
-    apt-get update -y
-    apt-get install -y gpg-agent wget
-    # To add the online network package repository for the GPU Driver
-    wget -qO - https://repositories.intel.com/gpu/intel-graphics.key \
-        | gpg --yes --dearmor --output /usr/share/keyrings/intel-graphics.gpg
-    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] \
-        https://repositories.intel.com/gpu/ubuntu ${VERSION_CODENAME}${XPU_DRIVER_VERSION} unified" \
-        | tee /etc/apt/sources.list.d/intel-gpu-${VERSION_CODENAME}.list
-    # To add the online network network package repository for the Intel Support Packages
-    wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
-        | gpg --dearmor > /usr/share/keyrings/oneapi-archive-keyring.gpg.gpg
-    echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg.gpg] \
-        https://apt.repos.intel.com/oneapi all main" \
-        | tee /etc/apt/sources.list.d/oneAPI.list
-
-    # Update the packages list and repository index
-    apt-get update
-
-    # The xpu-smi packages
-    apt-get install -y flex bison xpu-smi
-
-    if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
-        # Compute and Media Runtimes
-        apt-get install -y \
-            intel-opencl-icd intel-level-zero-gpu level-zero \
-            intel-media-va-driver-non-free libmfx1 libmfxgen1 libvpl2 \
-            libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
-            libglapi-mesa libgles2-mesa-dev libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
-            mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo
-        # Development Packages
-        apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev level-zero-dev
-    else # rolling driver
-        apt-get install -y \
-            intel-opencl-icd libze-intel-gpu1 libze1 \
-            intel-media-va-driver-non-free libmfx-gen1 libvpl2 \
-            libegl-mesa0 libegl1-mesa libegl1-mesa-dev libgbm1 libgl1-mesa-dev libgl1-mesa-dri \
-            libglapi-mesa libglx-mesa0 libigdgmm12 libxatracker2 mesa-va-drivers \
-            mesa-vdpau-drivers mesa-vulkan-drivers va-driver-all vainfo hwinfo clinfo intel-ocloc
-        apt-get install -y libigc-dev intel-igc-cm libigdfcl-dev libigfxcmrt-dev libze-dev
-    fi
-
-    # Install Intel Support Packages
-    apt-get install -y ${XPU_PACKAGES}
-
-    # Cleanup
-    apt-get autoclean && apt-get clean
-    rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-}
-
-function install_rhel() {
-    . /etc/os-release
-    if [[ "${ID}" == "rhel" ]]; then
-        if [[ ! " 8.8 8.9 9.0 9.2 9.3 " =~ " ${VERSION_ID} " ]]; then
-            echo "RHEL version ${VERSION_ID} not supported"
-            exit
-        fi
-    elif [[ "${ID}" == "almalinux" ]]; then
-        # Workaround for almalinux8 which used by quay.io/pypa/manylinux_2_28_x86_64
-        VERSION_ID="8.8"
-    fi
-
-    dnf install -y 'dnf-command(config-manager)'
-    # To add the online network package repository for the GPU Driver
-    dnf config-manager --add-repo \
-        https://repositories.intel.com/gpu/rhel/${VERSION_ID}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_ID}.repo
-    # To add the online network network package repository for the Intel Support Packages
-    tee > /etc/yum.repos.d/oneAPI.repo << EOF
-[oneAPI]
-name=Intel for Pytorch GPU dev repository
-baseurl=https://yum.repos.intel.com/oneapi
-enabled=1
-gpgcheck=1
-repo_gpgcheck=1
-gpgkey=https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-EOF
-
-    # Install Intel Support Packages
-    yum install -y ${XPU_PACKAGES}
-    # The xpu-smi packages
-    dnf install -y xpu-smi
-    # Compute and Media Runtimes
-    dnf install --skip-broken -y \
-        intel-opencl intel-media intel-mediasdk libmfxgen1 libvpl2\
-        level-zero intel-level-zero-gpu mesa-dri-drivers mesa-vulkan-drivers \
-        mesa-vdpau-drivers libdrm mesa-libEGL mesa-libgbm mesa-libGL \
-        mesa-libxatracker libvpl-tools intel-metrics-discovery \
-        intel-metrics-library intel-igc-core intel-igc-cm \
-        libva libva-utils intel-gmmlib libmetee intel-gsc intel-ocloc
-    # Development packages
-    dnf install -y --refresh \
-        intel-igc-opencl-devel level-zero-devel intel-gsc-devel libmetee-devel \
-        level-zero-devel
-
-    # Cleanup
-    dnf clean all
-    rm -rf /var/cache/yum
-    rm -rf /var/lib/yum/yumdb
-    rm -rf /var/lib/yum/history
-}
-
-function install_sles() {
-    . /etc/os-release
-    VERSION_SP=${VERSION_ID//./sp}
-    if [[ ! " 15sp4 15sp5 " =~ " ${VERSION_SP} " ]]; then
-        echo "SLES version ${VERSION_ID} not supported"
-        exit
-    fi
-
-    # To add the online network package repository for the GPU Driver
-    zypper addrepo -f -r \
-        https://repositories.intel.com/gpu/sles/${VERSION_SP}${XPU_DRIVER_VERSION}/unified/intel-gpu-${VERSION_SP}.repo
-    rpm --import https://repositories.intel.com/gpu/intel-graphics.key
-    # To add the online network network package repository for the Intel Support Packages
-    zypper addrepo https://yum.repos.intel.com/oneapi oneAPI
-    rpm --import https://yum.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
-
-    # The xpu-smi packages
-    zypper install -y lsb-release flex bison xpu-smi
-    # Compute and Media Runtimes
-    zypper install -y intel-level-zero-gpu level-zero intel-gsc intel-opencl intel-ocloc \
-        intel-media-driver libigfxcmrt7 libvpl2 libvpl-tools libmfxgen1 libmfx1
-    # Development packages
-    zypper install -y libigdfcl-devel intel-igc-cm libigfxcmrt-devel level-zero-devel
-
-    # Install Intel Support Packages
-    zypper install -y ${XPU_PACKAGES}
-
-}
-
-# Default use GPU driver rolling releases
-XPU_DRIVER_VERSION=""
-if [[ "${XPU_DRIVER_TYPE,,}" == "lts" ]]; then
-    # Use GPU driver LTS releases
-    XPU_DRIVER_VERSION="/lts/2350"
-fi
-
-# Default use Intel® oneAPI Deep Learning Essentials 2025.1
-if [[ "$XPU_VERSION" == "2025.2" ]]; then
-    XPU_PACKAGES="intel-deep-learning-essentials-2025.2"
-else
-    XPU_PACKAGES="intel-deep-learning-essentials-2025.1"
-fi
-
-# The installation depends on the base OS
-ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
-case "$ID" in
-    ubuntu)
-        install_ubuntu
-    ;;
-    rhel|almalinux)
-        install_rhel
-    ;;
-    sles)
-        install_sles
-    ;;
-    *)
-        echo "Unable to determine OS..."
-        exit 1
-    ;;
-esac
--- a/.ci/docker/libtorch/Dockerfile
+++ b/.ci/docker/libtorch/Dockerfile
@ -1,117 +0,0 @@
-ARG BASE_TARGET=base
-ARG GPU_IMAGE=ubuntu:20.04
-FROM ${GPU_IMAGE} as base
-
-ENV DEBIAN_FRONTEND=noninteractive
-
-RUN apt-get clean && apt-get update
-RUN apt-get install -y curl locales g++ git-all autoconf automake make cmake wget unzip sudo
-# Just add everything as a safe.directory for git since these will be used in multiple places with git
-RUN git config --global --add safe.directory '*'
-
-RUN locale-gen en_US.UTF-8
-
-ENV LC_ALL en_US.UTF-8
-ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US.UTF-8
-
-# Install openssl
-FROM base as openssl
-ADD ./common/install_openssl.sh install_openssl.sh
-RUN bash ./install_openssl.sh && rm install_openssl.sh
-
-# Install python
-FROM base as python
-ADD common/install_cpython.sh install_cpython.sh
-RUN apt-get update -y && \
-    apt-get install build-essential gdb lcov libbz2-dev libffi-dev \
-        libgdbm-dev liblzma-dev libncurses5-dev libreadline6-dev \
-        libsqlite3-dev libssl-dev lzma lzma-dev tk-dev uuid-dev zlib1g-dev -y && \
-    bash ./install_cpython.sh && \
-    rm install_cpython.sh && \
-    apt-get clean
-
-FROM base as conda
-ADD ./common/install_conda_docker.sh install_conda.sh
-RUN bash ./install_conda.sh && rm install_conda.sh
-
-FROM base as cpu
-# Install Anaconda
-COPY --from=conda /opt/conda /opt/conda
-# Install python
-COPY --from=python /opt/python    /opt/python
-COPY --from=python /opt/_internal /opt/_internal
-ENV PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
-# Install MKL
-ADD ./common/install_mkl.sh install_mkl.sh
-RUN bash ./install_mkl.sh && rm install_mkl.sh
-
-FROM cpu as cuda
-ADD ./common/install_cuda.sh install_cuda.sh
-ADD ./common/install_magma.sh install_magma.sh
-COPY ./common/install_nccl.sh install_nccl.sh
-COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
-COPY ./common/install_cusparselt.sh install_cusparselt.sh
-ENV CUDA_HOME /usr/local/cuda
-
-FROM cuda as cuda12.6
-RUN bash ./install_cuda.sh 12.6
-RUN bash ./install_magma.sh 12.6
-RUN ln -sf /usr/local/cuda-12.6 /usr/local/cuda
-
-FROM cuda as cuda12.8
-RUN bash ./install_cuda.sh 12.8
-RUN bash ./install_magma.sh 12.8
-RUN ln -sf /usr/local/cuda-12.8 /usr/local/cuda
-
-FROM cuda as cuda12.9
-RUN bash ./install_cuda.sh 12.9
-RUN bash ./install_magma.sh 12.9
-RUN ln -sf /usr/local/cuda-12.9 /usr/local/cuda
-
-FROM cuda as cuda13.0
-RUN bash ./install_cuda.sh 13.0
-RUN bash ./install_magma.sh 13.0
-RUN ln -sf /usr/local/cuda-13.0 /usr/local/cuda
-
-# Install libibverbs for libtorch and copy to CUDA directory
-RUN apt-get update -y && \
-    apt-get install -y libibverbs-dev librdmacm-dev && \
-    cp /usr/lib/x86_64-linux-gnu/libmlx5.so* /usr/local/cuda/lib64/ && \
-    cp /usr/lib/x86_64-linux-gnu/librdmacm.so* /usr/local/cuda/lib64/ && \
-    cp /usr/lib/x86_64-linux-gnu/libibverbs.so* /usr/local/cuda/lib64/ && \
-    cp /usr/lib/x86_64-linux-gnu/libnl* /usr/local/cuda/lib64/
-
-FROM cpu as rocm
-ARG ROCM_VERSION
-ARG PYTORCH_ROCM_ARCH
-ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
-ENV MKLROOT /opt/intel
-# Adding ROCM_PATH env var so that LoadHip.cmake (even with logic updated for ROCm6.0)
-# find HIP works for ROCm5.7. Not needed for ROCm6.0 and above.
-# Remove below when ROCm5.7 is not in support matrix anymore.
-ENV ROCM_PATH /opt/rocm
-# No need to install ROCm as base docker image should have full ROCm install
-#ADD ./common/install_rocm.sh install_rocm.sh
-ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
-ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
-# gfortran and python needed for building magma from source for ROCm
-RUN apt-get update -y && \
-    apt-get install gfortran -y && \
-    apt-get install python3 python-is-python3 -y && \
-    apt-get clean
-
-RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
-RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
-
-FROM ${BASE_TARGET} as final
-COPY --from=openssl            /opt/openssl           /opt/openssl
-# Install patchelf
-ADD ./common/install_patchelf.sh install_patchelf.sh
-RUN bash ./install_patchelf.sh && rm install_patchelf.sh
-# Install Anaconda
-COPY --from=conda /opt/conda /opt/conda
-# Install python
-COPY --from=python /opt/python    /opt/python
-COPY --from=python /opt/_internal /opt/_internal
-ENV PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH
--- a/.ci/docker/libtorch/build.sh
+++ b/.ci/docker/libtorch/build.sh
@ -1,67 +0,0 @@
-#!/usr/bin/env bash
-# Script used only in CD pipeline
-
-set -eoux pipefail
-
-image="$1"
-shift
-
-if [ -z "${image}" ]; then
-  echo "Usage: $0 IMAGENAME:ARCHTAG"
-  exit 1
-fi
-
-TOPDIR=$(git rev-parse --show-toplevel)
-
-DOCKER=${DOCKER:-docker}
-
-# Go from imagename:tag to tag
-DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
-
-GPU_ARCH_VERSION=""
-if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
-    # extract cuda version from image name.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8
-    GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
-elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
-    # extract rocm version from image name.  e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
-    GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
-fi
-
-case ${DOCKER_TAG_PREFIX} in
-    cpu)
-        BASE_TARGET=cpu
-        GPU_IMAGE=ubuntu:20.04
-        DOCKER_GPU_BUILD_ARG=""
-        ;;
-    cuda*)
-        BASE_TARGET=cuda${GPU_ARCH_VERSION}
-        GPU_IMAGE=ubuntu:20.04
-        DOCKER_GPU_BUILD_ARG=""
-        ;;
-    rocm*)
-        # we want the patch version of 6.4 instead
-        if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
-            GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
-        fi
-        BASE_TARGET=rocm
-        GPU_IMAGE=rocm/dev-ubuntu-22.04:${GPU_ARCH_VERSION}-complete
-        PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
-        DOCKER_GPU_BUILD_ARG="--build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg ROCM_VERSION=${GPU_ARCH_VERSION}"
-        ;;
-    *)
-        echo "ERROR: Unrecognized DOCKER_TAG_PREFIX: ${DOCKER_TAG_PREFIX}"
-        exit 1
-        ;;
-esac
-
-tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
-
-DOCKER_BUILDKIT=1 ${DOCKER} build \
-    --target final \
-    ${DOCKER_GPU_BUILD_ARG} \
-    --build-arg "GPU_IMAGE=${GPU_IMAGE}" \
-    --build-arg "BASE_TARGET=${BASE_TARGET}" \
-    -t "${tmp_tag}" \
-    $@ \
-    -f "${TOPDIR}/.ci/docker/libtorch/Dockerfile" \
-    "${TOPDIR}/.ci/docker/"
--- a/.ci/docker/linter-cuda/Dockerfile
+++ b/.ci/docker/linter-cuda/Dockerfile
@ -1,48 +0,0 @@
-ARG UBUNTU_VERSION
-
-FROM ubuntu:${UBUNTU_VERSION}
-
-ARG UBUNTU_VERSION
-
-ENV DEBIAN_FRONTEND noninteractive
-
-# Install common dependencies (so that this step can be cached separately)
-COPY ./common/install_base.sh install_base.sh
-RUN bash ./install_base.sh && rm install_base.sh
-
-# Install missing libomp-dev
-RUN apt-get update && apt-get install -y --no-install-recommends libomp-dev && apt-get autoclean && apt-get clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
-
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
-# Install conda and other packages (e.g., numpy, pytest)
-ARG PYTHON_VERSION
-ARG PIP_CMAKE
-# Put venv into the env vars so users don't need to activate it
-ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
-ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
-COPY requirements-ci.txt /opt/requirements-ci.txt
-COPY ./common/install_python.sh install_python.sh
-RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
-
-# Install cuda and cudnn
-ARG CUDA_VERSION
-COPY ./common/install_cuda.sh install_cuda.sh
-COPY ./common/install_nccl.sh install_nccl.sh
-COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
-COPY ./common/install_cusparselt.sh install_cusparselt.sh
-RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu* install_cusparselt.sh
-ENV DESIRED_CUDA ${CUDA_VERSION}
-ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
-
-# Note that Docker build forbids copying file outside the build context
-COPY ./common/install_linter.sh install_linter.sh
-RUN bash ./install_linter.sh
-RUN rm install_linter.sh
-
-RUN chown -R jenkins:jenkins /var/lib/jenkins/ci_env
-
-USER jenkins
-CMD ["bash"]
--- a/.ci/docker/linter/Dockerfile
+++ b/.ci/docker/linter/Dockerfile
@ -1,33 +0,0 @@
-ARG UBUNTU_VERSION
-
-FROM ubuntu:${UBUNTU_VERSION}
-
-ARG UBUNTU_VERSION
-
-ENV DEBIAN_FRONTEND noninteractive
-
-# Install common dependencies (so that this step can be cached separately)
-COPY ./common/install_base.sh install_base.sh
-RUN bash ./install_base.sh && rm install_base.sh
-
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
-# Install conda and other packages (e.g., numpy, pytest)
-ARG PYTHON_VERSION
-ENV PATH /var/lib/jenkins/ci_env/bin:$PATH
-ENV VIRTUAL_ENV /var/lib/jenkins/ci_env
-COPY requirements-ci.txt /opt/requirements-ci.txt
-COPY ./common/install_python.sh install_python.sh
-RUN bash ./install_python.sh && rm install_python.sh /opt/requirements-ci.txt
-
-# Note that Docker build forbids copying file outside the build context
-COPY ./common/install_linter.sh install_linter.sh
-RUN bash ./install_linter.sh
-RUN rm install_linter.sh
-
-RUN chown -R jenkins:jenkins /var/lib/jenkins/ci_env
-
-USER jenkins
-CMD ["bash"]
--- a/.ci/docker/manywheel/Dockerfile_2_28
+++ b/.ci/docker/manywheel/Dockerfile_2_28
@ -1,180 +0,0 @@
-# syntax = docker/dockerfile:experimental
-ARG BASE_CUDA_VERSION=11.8
-ARG GPU_IMAGE=amd64/almalinux:8
-FROM quay.io/pypa/manylinux_2_28_x86_64 as base
-
-ENV LC_ALL en_US.UTF-8
-ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US.UTF-8
-
-ARG DEVTOOLSET_VERSION=13
-RUN yum install -y sudo wget curl perl util-linux xz bzip2 git patch which perl zlib-devel yum-utils gcc-toolset-${DEVTOOLSET_VERSION}-gcc gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran gcc-toolset-${DEVTOOLSET_VERSION}-gdb
-ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
-ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
-
-# cmake-3.18.4 from pip
-RUN yum install -y python3-pip && \
-    python3 -mpip install cmake==3.18.4 && \
-    ln -s /usr/local/bin/cmake /usr/bin/cmake3
-
-FROM base as openssl
-# Install openssl (this must precede `build python` step)
-# (In order to have a proper SSL module, Python is compiled
-# against a recent openssl [see env vars above], which is linked
-# statically. We delete openssl afterwards.)
-ADD ./common/install_openssl.sh install_openssl.sh
-RUN bash ./install_openssl.sh && rm install_openssl.sh
-
-
-# remove unnecessary python versions
-RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
-RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
-RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
-RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
-
-FROM base as cuda
-ARG BASE_CUDA_VERSION=12.6
-# Install CUDA
-ADD ./common/install_cuda.sh install_cuda.sh
-COPY ./common/install_nccl.sh install_nccl.sh
-COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
-COPY ./common/install_cusparselt.sh install_cusparselt.sh
-RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu* install_cusparselt.sh
-
-FROM base as intel
-# MKL
-ADD ./common/install_mkl.sh install_mkl.sh
-RUN bash ./install_mkl.sh && rm install_mkl.sh
-
-FROM base as magma
-ARG BASE_CUDA_VERSION=12.6
-# Install magma
-ADD ./common/install_magma.sh install_magma.sh
-RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
-
-FROM base as jni
-# Install java jni header
-ADD ./common/install_jni.sh install_jni.sh
-ADD ./java/jni.h jni.h
-RUN bash ./install_jni.sh && rm install_jni.sh
-
-FROM base as libpng
-# Install libpng
-ADD ./common/install_libpng.sh install_libpng.sh
-RUN bash ./install_libpng.sh && rm install_libpng.sh
-
-FROM ${GPU_IMAGE} as common
-ARG DEVTOOLSET_VERSION=13
-ENV LC_ALL en_US.UTF-8
-ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US.UTF-8
-RUN yum -y install epel-release
-RUN yum -y update
-RUN yum install -y \
-        autoconf \
-        automake \
-        bison \
-        bzip2 \
-        curl \
-        diffutils \
-        file \
-        git \
-        make \
-        patch \
-        perl \
-        unzip \
-        util-linux \
-        wget \
-        which \
-        xz \
-        glibc-langpack-en \
-        gcc-toolset-${DEVTOOLSET_VERSION}-gcc \
-        gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \
-        gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
-        gcc-toolset-${DEVTOOLSET_VERSION}-gdb
-
-# git236+ would refuse to run git commands in repos owned by other users
-# Which causes version check to fail, as pytorch repo is bind-mounted into the image
-# Override this behaviour by treating every folder as safe
-# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
-RUN git config --global --add safe.directory "*"
-
-ENV SSL_CERT_FILE=/opt/_internal/certs.pem
-# Install LLVM version
-COPY --from=openssl            /opt/openssl                          /opt/openssl
-COPY --from=base               /opt/python                           /opt/python
-COPY --from=base               /usr/local/lib/                       /usr/local/lib/
-COPY --from=base               /opt/_internal                        /opt/_internal
-COPY --from=base               /usr/local/bin/auditwheel             /usr/local/bin/auditwheel
-COPY --from=intel              /opt/intel                            /opt/intel
-COPY --from=base               /usr/local/bin/patchelf               /usr/local/bin/patchelf
-COPY --from=libpng             /usr/local/bin/png*                   /usr/local/bin/
-COPY --from=libpng             /usr/local/bin/libpng*                /usr/local/bin/
-COPY --from=libpng             /usr/local/include/png*               /usr/local/include/
-COPY --from=libpng             /usr/local/include/libpng*            /usr/local/include/
-COPY --from=libpng             /usr/local/lib/libpng*                /usr/local/lib/
-COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/lib/pkgconfig
-COPY --from=jni                /usr/local/include/jni.h              /usr/local/include/jni.h
-
-FROM common as cpu_final
-ARG BASE_CUDA_VERSION=12.6
-ARG DEVTOOLSET_VERSION=13
-# Install Anaconda
-ADD ./common/install_conda_docker.sh install_conda.sh
-RUN bash ./install_conda.sh && rm install_conda.sh
-ENV PATH /opt/conda/bin:$PATH
-# Ensure the expected devtoolset is used
-ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
-ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
-# Install setuptools and wheel for python 3.12/3.13
-RUN for cpython_version in "cp312-cp312" "cp313-cp313" "cp313-cp313t"; do \
-    /opt/python/${cpython_version}/bin/python -m pip install setuptools wheel; \
-    done;
-
-
-# cmake-3.18.4 from pip; force in case cmake3 already exists
-RUN yum install -y python3-pip && \
-    python3 -mpip install cmake==3.18.4 && \
-    ln -sf /usr/local/bin/cmake /usr/bin/cmake3
-
-FROM cpu_final as cuda_final
-RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
-COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
-COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
-RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
-ENV PATH=/usr/local/cuda/bin:$PATH
-
-FROM cpu_final as rocm_final
-ARG ROCM_VERSION=6.0
-ARG PYTORCH_ROCM_ARCH
-ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
-ARG DEVTOOLSET_VERSION=11
-ENV LDFLAGS="-Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64 -Wl,-rpath=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib"
-# Somewhere in ROCm stack, we still use non-existing /opt/rocm/hip path,
-# below workaround helps avoid error
-ENV ROCM_PATH /opt/rocm
-# cmake-3.28.4 from pip to get enable_language(HIP)
-# and avoid 3.21.0 cmake+ninja issues with ninja inserting "-Wl,--no-as-needed" in LINK_FLAGS for static linker
-RUN python3 -m pip install --upgrade pip && \
-    python3 -mpip install cmake==3.28.4
-# replace the libdrm in /opt/amdgpu with custom amdgpu.ids lookup path
-ADD ./common/install_rocm_drm.sh install_rocm_drm.sh
-RUN bash ./install_rocm_drm.sh && rm install_rocm_drm.sh
-# ROCm 6.4 rocm-smi depends on system drm.h header
-RUN yum install -y libdrm-devel
-ENV MKLROOT /opt/intel
-ADD ./common/install_rocm_magma.sh install_rocm_magma.sh
-RUN bash ./install_rocm_magma.sh ${ROCM_VERSION} && rm install_rocm_magma.sh
-ADD ./common/install_miopen.sh install_miopen.sh
-RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
-
-FROM cpu_final as xpu_final
-# XPU CD use rolling driver
-ENV XPU_DRIVER_TYPE ROLLING
-# cmake-3.28.4 from pip
-RUN python3 -m pip install --upgrade pip && \
-    python3 -mpip install cmake==3.28.4
-ADD ./common/install_xpu.sh install_xpu.sh
-ENV XPU_VERSION 2025.2
-RUN bash ./install_xpu.sh && rm install_xpu.sh
-RUN pushd /opt/_internal && tar -xJf static-libs-for-embedding-only.tar.xz && popd
--- a/.ci/docker/manywheel/Dockerfile_2_28_aarch64
+++ b/.ci/docker/manywheel/Dockerfile_2_28_aarch64
@ -1,73 +0,0 @@
-FROM quay.io/pypa/manylinux_2_28_aarch64 as base
-
-ARG GCCTOOLSET_VERSION=13
-
-# Language variables
-ENV LC_ALL=en_US.UTF-8
-ENV LANG=en_US.UTF-8
-ENV LANGUAGE=en_US.UTF-8
-
-# Installed needed OS packages. This is to support all
-# the binary builds (torch, vision, audio, text, data)
-RUN yum -y install epel-release
-RUN yum -y update
-RUN yum install -y \
-  autoconf \
-  automake \
-  bison \
-  bzip2 \
-  curl \
-  diffutils \
-  file \
-  git \
-  less \
-  libffi-devel \
-  libgomp \
-  make \
-  openssl-devel \
-  patch \
-  perl \
-  unzip \
-  util-linux \
-  wget \
-  which \
-  xz \
-  yasm \
-  zstd \
-  sudo \
-  gcc-toolset-${GCCTOOLSET_VERSION}-gcc \
-  gcc-toolset-${GCCTOOLSET_VERSION}-gcc-c++ \
-  gcc-toolset-${GCCTOOLSET_VERSION}-gcc-gfortran \
-  gcc-toolset-${GCCTOOLSET_VERSION}-gdb
-
-# (optional) Install non-default Ninja version
-ARG NINJA_VERSION
-COPY ./common/install_ninja.sh install_ninja.sh
-RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
-RUN rm install_ninja.sh
-
-# Ensure the expected devtoolset is used
-ENV PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/bin:$PATH
-ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${GCCTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
-
-# git236+ would refuse to run git commands in repos owned by other users
-# Which causes version check to fail, as pytorch repo is bind-mounted into the image
-# Override this behaviour by treating every folder as safe
-# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
-RUN git config --global --add safe.directory "*"
-
-FROM base as openblas
-# Install openblas
-ARG OPENBLAS_VERSION
-ADD ./common/install_openblas.sh install_openblas.sh
-RUN bash ./install_openblas.sh && rm install_openblas.sh
-
-FROM base as final
-
-# remove unnecessary python versions
-RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
-RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
-RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
-RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
-COPY --from=openblas     /opt/OpenBLAS/  /opt/OpenBLAS/
-ENV LD_LIBRARY_PATH=/opt/OpenBLAS/lib:$LD_LIBRARY_PATH
--- a/.ci/docker/manywheel/Dockerfile_cuda_aarch64
+++ b/.ci/docker/manywheel/Dockerfile_cuda_aarch64
@ -1,97 +0,0 @@
-FROM quay.io/pypa/manylinux_2_28_aarch64 as base
-
-# Cuda ARM build needs gcc 11
-ARG DEVTOOLSET_VERSION=13
-
-# Language variables
-ENV LC_ALL=en_US.UTF-8
-ENV LANG=en_US.UTF-8
-ENV LANGUAGE=en_US.UTF-8
-
-# Installed needed OS packages. This is to support all
-# the binary builds (torch, vision, audio, text, data)
-RUN yum -y install epel-release
-RUN yum -y update
-RUN yum install -y \
-  autoconf \
-  automake \
-  bison \
-  bzip2 \
-  curl \
-  diffutils \
-  file \
-  git \
-  make \
-  patch \
-  perl \
-  unzip \
-  util-linux \
-  wget \
-  which \
-  xz \
-  yasm \
-  less \
-  zstd \
-  libgomp \
-  sudo \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gcc \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gdb
-
-# Ensure the expected devtoolset is used
-ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
-ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
-
-# git236+ would refuse to run git commands in repos owned by other users
-# Which causes version check to fail, as pytorch repo is bind-mounted into the image
-# Override this behaviour by treating every folder as safe
-# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
-RUN git config --global --add safe.directory "*"
-
-
-FROM base as openssl
-# Install openssl (this must precede `build python` step)
-# (In order to have a proper SSL module, Python is compiled
-# against a recent openssl [see env vars above], which is linked
-# statically. We delete openssl afterwards.)
-ADD ./common/install_openssl.sh install_openssl.sh
-RUN bash ./install_openssl.sh && rm install_openssl.sh
-ENV SSL_CERT_FILE=/opt/_internal/certs.pem
-
-FROM openssl as final
-# remove unnecessary python versions
-RUN rm -rf /opt/python/cp26-cp26m /opt/_internal/cpython-2.6.9-ucs2
-RUN rm -rf /opt/python/cp26-cp26mu /opt/_internal/cpython-2.6.9-ucs4
-RUN rm -rf /opt/python/cp33-cp33m /opt/_internal/cpython-3.3.6
-RUN rm -rf /opt/python/cp34-cp34m /opt/_internal/cpython-3.4.6
-
-FROM base as cuda
-ARG BASE_CUDA_VERSION
-# Install CUDA
-ADD ./common/install_cuda.sh install_cuda.sh
-COPY ./common/install_nccl.sh install_nccl.sh
-COPY ./common/install_cusparselt.sh install_cusparselt.sh
-COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
-RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu* install_cusparselt.sh
-
-FROM base as magma
-ARG BASE_CUDA_VERSION
-# Install magma
-ADD ./common/install_magma.sh install_magma.sh
-RUN bash ./install_magma.sh ${BASE_CUDA_VERSION} && rm install_magma.sh
-
-FROM base as nvpl
-# Install nvpl
-ADD ./common/install_nvpl.sh install_nvpl.sh
-RUN bash ./install_nvpl.sh && rm install_nvpl.sh
-
-FROM final as cuda_final
-ARG BASE_CUDA_VERSION
-RUN rm -rf /usr/local/cuda-${BASE_CUDA_VERSION}
-COPY --from=cuda     /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
-COPY --from=magma    /usr/local/cuda-${BASE_CUDA_VERSION}  /usr/local/cuda-${BASE_CUDA_VERSION}
-COPY --from=nvpl /opt/nvpl/lib/  /usr/local/lib/
-COPY --from=nvpl /opt/nvpl/include/  /usr/local/include/
-RUN ln -sf /usr/local/cuda-${BASE_CUDA_VERSION} /usr/local/cuda
-ENV PATH=/usr/local/cuda/bin:$PATH
--- a/.ci/docker/manywheel/Dockerfile_cxx11-abi
+++ b/.ci/docker/manywheel/Dockerfile_cxx11-abi
@ -1,71 +0,0 @@
-FROM centos:8 as base
-
-ENV LC_ALL en_US.UTF-8
-ENV LANG en_US.UTF-8
-ENV LANGUAGE en_US.UTF-8
-ENV PATH /opt/rh/gcc-toolset-11/root/bin/:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-
-# change to a valid repo
-RUN sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-Linux-*.repo
-# enable to install ninja-build
-RUN sed -i 's|enabled=0|enabled=1|g' /etc/yum.repos.d/CentOS-Linux-PowerTools.repo
-
-RUN yum -y update
-RUN yum install -y wget curl perl util-linux xz bzip2 git patch which zlib-devel sudo
-RUN yum install -y autoconf automake make cmake gdb gcc-toolset-11-gcc-c++
-
-
-FROM base as openssl
-ADD ./common/install_openssl.sh install_openssl.sh
-RUN bash ./install_openssl.sh && rm install_openssl.sh
-
-# Install python
-FROM base as python
-RUN yum install -y openssl-devel zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel
-ADD common/install_cpython.sh install_cpython.sh
-RUN bash ./install_cpython.sh && rm install_cpython.sh
-
-FROM base as conda
-ADD ./common/install_conda_docker.sh install_conda.sh
-RUN bash ./install_conda.sh && rm install_conda.sh
-RUN /opt/conda/bin/conda install -y cmake
-
-FROM base as intel
-# Install MKL
-COPY --from=python             /opt/python                           /opt/python
-COPY --from=python             /opt/_internal                        /opt/_internal
-COPY --from=conda              /opt/conda                            /opt/conda
-ENV PATH=/opt/conda/bin:$PATH
-ADD ./common/install_mkl.sh install_mkl.sh
-RUN bash ./install_mkl.sh && rm install_mkl.sh
-
-FROM base as patchelf
-ADD ./common/install_patchelf.sh install_patchelf.sh
-RUN bash ./install_patchelf.sh && rm install_patchelf.sh
-RUN cp $(which patchelf) /patchelf
-
-FROM base as jni
-ADD ./common/install_jni.sh install_jni.sh
-ADD ./java/jni.h jni.h
-RUN bash ./install_jni.sh && rm install_jni.sh
-
-FROM base as libpng
-ADD ./common/install_libpng.sh install_libpng.sh
-RUN bash ./install_libpng.sh && rm install_libpng.sh
-
-FROM base as final
-COPY --from=openssl            /opt/openssl                          /opt/openssl
-COPY --from=python             /opt/python                           /opt/python
-COPY --from=python             /opt/_internal                        /opt/_internal
-COPY --from=intel              /opt/intel                            /opt/intel
-COPY --from=conda              /opt/conda                            /opt/conda
-COPY --from=patchelf           /usr/local/bin/patchelf               /usr/local/bin/patchelf
-COPY --from=jni                /usr/local/include/jni.h              /usr/local/include/jni.h
-COPY --from=libpng             /usr/local/bin/png*                   /usr/local/bin/
-COPY --from=libpng             /usr/local/bin/libpng*                /usr/local/bin/
-COPY --from=libpng             /usr/local/include/png*               /usr/local/include/
-COPY --from=libpng             /usr/local/include/libpng*            /usr/local/include/
-COPY --from=libpng             /usr/local/lib/libpng*                /usr/local/lib/
-COPY --from=libpng             /usr/local/lib/pkgconfig              /usr/local/lib/pkgconfig
-
-RUN yum install -y ninja-build
--- a/.ci/docker/manywheel/Dockerfile_s390x
+++ b/.ci/docker/manywheel/Dockerfile_s390x
@ -1,141 +0,0 @@
-FROM quay.io/pypa/manylinux_2_28_s390x as base
-
-# Language variables
-ENV LC_ALL=C.UTF-8
-ENV LANG=C.UTF-8
-ENV LANGUAGE=C.UTF-8
-
-# there is a bugfix in gcc >= 14 for precompiled headers and s390x vectorization interaction.
-# with earlier gcc versions test/inductor/test_cpu_cpp_wrapper.py will fail.
-ARG DEVTOOLSET_VERSION=14
-# Installed needed OS packages. This is to support all
-# the binary builds (torch, vision, audio, text, data)
-RUN yum -y install epel-release
-RUN yum -y update
-RUN yum install -y \
-  sudo \
-  autoconf \
-  automake \
-  bison \
-  bzip2 \
-  curl \
-  diffutils \
-  file \
-  git \
-  make \
-  patch \
-  perl \
-  unzip \
-  util-linux \
-  wget \
-  which \
-  xz \
-  yasm \
-  less \
-  zstd \
-  libgomp \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gcc \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-c++ \
-  gcc-toolset-${DEVTOOLSET_VERSION}-binutils \
-  gcc-toolset-${DEVTOOLSET_VERSION}-gcc-gfortran \
-  cmake \
-  rust \
-  cargo \
-  llvm-devel \
-  libzstd-devel \
-  python3.12-devel \
-  python3.12-test \
-  python3.12-setuptools \
-  python3.12-pip \
-  python3-virtualenv \
-  python3.12-pyyaml \
-  python3.12-numpy \
-  python3.12-wheel \
-  python3.12-cryptography \
-  blas-devel \
-  openblas-devel \
-  lapack-devel \
-  atlas-devel \
-  libjpeg-devel \
-  libxslt-devel \
-  libxml2-devel \
-  openssl-devel \
-  valgrind \
-  ninja-build
-
-ENV PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/bin:$PATH
-ENV LD_LIBRARY_PATH=/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib64:/opt/rh/gcc-toolset-${DEVTOOLSET_VERSION}/root/usr/lib:$LD_LIBRARY_PATH
-
-# git236+ would refuse to run git commands in repos owned by other users
-# Which causes version check to fail, as pytorch repo is bind-mounted into the image
-# Override this behaviour by treating every folder as safe
-# For more details see https://github.com/pytorch/pytorch/issues/78659#issuecomment-1144107327
-RUN git config --global --add safe.directory "*"
-
-# installed python doesn't have development parts. Rebuild it from scratch
-RUN /bin/rm -rf /opt/_internal /opt/python /usr/local/*/*
-
-# EPEL for cmake
-FROM base as patchelf
-# Install patchelf
-ADD ./common/install_patchelf.sh install_patchelf.sh
-RUN bash ./install_patchelf.sh && rm install_patchelf.sh
-RUN cp $(which patchelf) /patchelf
-
-FROM patchelf as python
-# build python
-COPY manywheel/build_scripts /build_scripts
-ADD ./common/install_cpython.sh /build_scripts/install_cpython.sh
-ENV SSL_CERT_FILE=
-RUN bash build_scripts/build.sh && rm -r build_scripts
-
-FROM base as final
-COPY --from=python             /opt/python                           /opt/python
-COPY --from=python             /opt/_internal                        /opt/_internal
-COPY --from=python             /opt/python/cp39-cp39/bin/auditwheel  /usr/local/bin/auditwheel
-COPY --from=patchelf           /usr/local/bin/patchelf               /usr/local/bin/patchelf
-
-RUN alternatives --set python /usr/bin/python3.12
-RUN alternatives --set python3 /usr/bin/python3.12
-
-RUN pip-3.12 install typing_extensions
-
-ENTRYPOINT []
-CMD ["/bin/bash"]
-
-# install test dependencies:
-# - grpcio requires system openssl, bundled crypto fails to build
-RUN dnf install -y \
-  hdf5-devel \
-  python3-h5py \
-  git
-
-RUN env GRPC_PYTHON_BUILD_SYSTEM_OPENSSL=True pip3 install grpcio
-
-# cmake-3.28.0 from pip for onnxruntime
-RUN python3 -mpip install cmake==3.28.0
-
-# build onnxruntime 1.21.0 from sources.
-# it is not possible to build it from sources using pip,
-# so just build it from upstream repository.
-# h5py is dependency of onnxruntime_training.
-# h5py==3.11.0 builds with hdf5-devel 1.10.5 from repository.
-# h5py 3.11.0 doesn't build with numpy >= 2.3.0.
-# install newest flatbuffers version first:
-# for some reason old version is getting pulled in otherwise.
-# packaging package is required for onnxruntime wheel build.
-RUN pip3 install flatbuffers && \
-  pip3 install cython 'pkgconfig>=1.5.5' 'setuptools>=77' 'numpy<2.3.0' && \
-  pip3 install --no-build-isolation h5py==3.11.0 && \
-  pip3 install packaging && \
-  git clone https://github.com/microsoft/onnxruntime && \
-  cd onnxruntime && git checkout v1.21.0 && \
-  git submodule update --init --recursive && \
-  wget https://github.com/microsoft/onnxruntime/commit/f57db79743c4d1a3553aa05cf95bcd10966030e6.patch && \
-  patch -p1 < f57db79743c4d1a3553aa05cf95bcd10966030e6.patch && \
-  ./build.sh --config Release --parallel 0 --enable_pybind \
-  --build_wheel --enable_training --enable_training_apis \
-  --enable_training_ops --skip_tests --allow_running_as_root \
-  --compile_no_warning_as_error && \
-  pip3 install ./build/Linux/Release/dist/onnxruntime_training-*.whl && \
-  cd .. && /bin/rm -rf ./onnxruntime
--- a/.ci/docker/manywheel/build.sh
+++ b/.ci/docker/manywheel/build.sh
@ -1,129 +0,0 @@
-#!/usr/bin/env bash
-# Script used only in CD pipeline
-
-set -exou pipefail
-
-TOPDIR=$(git rev-parse --show-toplevel)
-
-image="$1"
-shift
-
-if [ -z "${image}" ]; then
-  echo "Usage: $0 IMAGE:ARCHTAG"
-  exit 1
-fi
-
-# Go from imagename:tag to tag
-DOCKER_TAG_PREFIX=$(echo "${image}" | awk -F':' '{print $2}')
-
-GPU_ARCH_VERSION=""
-if [[ "${DOCKER_TAG_PREFIX}" == cuda* ]]; then
-    # extract cuda version from image name.  e.g. manylinux2_28-builder:cuda12.8 returns 12.8
-    GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'cuda' '{print $2}')
-elif [[ "${DOCKER_TAG_PREFIX}" == rocm* ]]; then
-    # extract rocm version from image name.  e.g. manylinux2_28-builder:rocm6.2.4 returns 6.2.4
-    GPU_ARCH_VERSION=$(echo "${DOCKER_TAG_PREFIX}" | awk -F'rocm' '{print $2}')
-fi
-
-MANY_LINUX_VERSION=${MANY_LINUX_VERSION:-}
-DOCKERFILE_SUFFIX=${DOCKERFILE_SUFFIX:-}
-OPENBLAS_VERSION=${OPENBLAS_VERSION:-}
-
-case ${image} in
-    manylinux2_28-builder:cpu)
-        TARGET=cpu_final
-        GPU_IMAGE=amd64/almalinux:8
-        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13"
-        MANY_LINUX_VERSION="2_28"
-        ;;
-    manylinux2_28_aarch64-builder:cpu-aarch64)
-        TARGET=final
-        GPU_IMAGE=arm64v8/almalinux:8
-        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=13 --build-arg NINJA_VERSION=1.12.1"
-        MANY_LINUX_VERSION="2_28_aarch64"
-        OPENBLAS_VERSION="v0.3.30"
-        ;;
-    manylinuxcxx11-abi-builder:cpu-cxx11-abi)
-        TARGET=final
-        GPU_IMAGE=""
-        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=9"
-        MANY_LINUX_VERSION="cxx11-abi"
-        ;;
-    manylinuxs390x-builder:cpu-s390x)
-        TARGET=final
-        GPU_IMAGE=s390x/almalinux:8
-        DOCKER_GPU_BUILD_ARG=""
-        MANY_LINUX_VERSION="s390x"
-        ;;
-    manylinux2_28-builder:cuda11*)
-        TARGET=cuda_final
-        GPU_IMAGE=amd64/almalinux:8
-        DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=11"
-        MANY_LINUX_VERSION="2_28"
-        ;;
-    manylinux2_28-builder:cuda12*)
-        TARGET=cuda_final
-        GPU_IMAGE=amd64/almalinux:8
-        DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13"
-        MANY_LINUX_VERSION="2_28"
-        ;;
-    manylinux2_28-builder:cuda13*)
-        TARGET=cuda_final
-        GPU_IMAGE=amd64/almalinux:8
-        DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13"
-        MANY_LINUX_VERSION="2_28"
-        ;;
-    manylinuxaarch64-builder:cuda*)
-        TARGET=cuda_final
-        GPU_IMAGE=amd64/almalinux:8
-        DOCKER_GPU_BUILD_ARG="--build-arg BASE_CUDA_VERSION=${GPU_ARCH_VERSION} --build-arg DEVTOOLSET_VERSION=13"
-        MANY_LINUX_VERSION="aarch64"
-        DOCKERFILE_SUFFIX="_cuda_aarch64"
-        ;;
-    manylinux2_28-builder:rocm*)
-        # we want the patch version of 6.4 instead
-        if [[ $(ver $GPU_ARCH_VERSION) -eq $(ver 6.4) ]]; then
-            GPU_ARCH_VERSION="${GPU_ARCH_VERSION}.2"
-        fi
-        TARGET=rocm_final
-        MANY_LINUX_VERSION="2_28"
-        DEVTOOLSET_VERSION="11"
-        GPU_IMAGE=rocm/dev-almalinux-8:${GPU_ARCH_VERSION}-complete
-        PYTORCH_ROCM_ARCH="gfx900;gfx906;gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200;gfx1201"
-        DOCKER_GPU_BUILD_ARG="--build-arg ROCM_VERSION=${GPU_ARCH_VERSION} --build-arg PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} --build-arg DEVTOOLSET_VERSION=${DEVTOOLSET_VERSION}"
-        ;;
-    manylinux2_28-builder:xpu)
-        TARGET=xpu_final
-        GPU_IMAGE=amd64/almalinux:8
-        DOCKER_GPU_BUILD_ARG=" --build-arg DEVTOOLSET_VERSION=11"
-        MANY_LINUX_VERSION="2_28"
-        ;;
-    *)
-        echo "ERROR: Unrecognized image name: ${image}"
-        exit 1
-        ;;
-esac
-
-if [[ -n ${MANY_LINUX_VERSION} && -z ${DOCKERFILE_SUFFIX} ]]; then
-    DOCKERFILE_SUFFIX=_${MANY_LINUX_VERSION}
-fi
-# Only activate this if in CI
-if [ "$(uname -m)" != "s390x" ] && [ -v CI ]; then
-    # TODO: Remove LimitNOFILE=1048576 patch once https://github.com/pytorch/test-infra/issues/5712
-    # is resolved. This patch is required in order to fix timing out of Docker build on Amazon Linux 2023.
-    sudo sed -i s/LimitNOFILE=infinity/LimitNOFILE=1048576/ /usr/lib/systemd/system/docker.service
-    sudo systemctl daemon-reload
-    sudo systemctl restart docker
-fi
-
-tmp_tag=$(basename "$(mktemp -u)" | tr '[:upper:]' '[:lower:]')
-
-DOCKER_BUILDKIT=1 docker build  \
-    ${DOCKER_GPU_BUILD_ARG} \
-    --build-arg "GPU_IMAGE=${GPU_IMAGE}" \
-    --build-arg "OPENBLAS_VERSION=${OPENBLAS_VERSION}" \
-    --target "${TARGET}" \
-    -t "${tmp_tag}" \
-    $@ \
-    -f "${TOPDIR}/.ci/docker/manywheel/Dockerfile${DOCKERFILE_SUFFIX}" \
-    "${TOPDIR}/.ci/docker/"
--- a/.ci/docker/manywheel/build_scripts/build.sh
+++ b/.ci/docker/manywheel/build_scripts/build.sh
@ -1,118 +0,0 @@
-#!/bin/bash
-# Top-level build script called from Dockerfile
-# Script used only in CD pipeline
-
-# Stop at any error, show all commands
-set -ex
-
-# openssl version to build, with expected sha256 hash of .tar.gz
-# archive
-OPENSSL_ROOT=openssl-1.1.1l
-OPENSSL_HASH=0b7a3e5e59c34827fe0c3a74b7ec8baef302b98fa80088d7f9153aa16fa76bd1
-DEVTOOLS_HASH=a8ebeb4bed624700f727179e6ef771dafe47651131a00a78b342251415646acc
-PATCHELF_HASH=d9afdff4baeacfbc64861454f368b7f2c15c44d245293f7587bbf726bfe722fb
-CURL_ROOT=curl-7.73.0
-CURL_HASH=cf34fe0b07b800f1c01a499a6e8b2af548f6d0e044dca4a29d88a4bee146d131
-AUTOCONF_ROOT=autoconf-2.69
-AUTOCONF_HASH=954bd69b391edc12d6a4a51a2dd1476543da5c6bbf05a95b59dc0dd6fd4c2969
-
-# Dependencies for compiling Python that we want to remove from
-# the final image after compiling Python
-PYTHON_COMPILE_DEPS="zlib-devel bzip2-devel ncurses-devel sqlite-devel readline-devel tk-devel gdbm-devel libpcap-devel xz-devel libffi-devel"
-
-if [ "$(uname -m)" != "s390x" ] ; then
-    PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} db4-devel"
-else
-    PYTHON_COMPILE_DEPS="${PYTHON_COMPILE_DEPS} libdb-devel"
-fi
-
-# Libraries that are allowed as part of the manylinux1 profile
-MANYLINUX1_DEPS="glibc-devel libstdc++-devel glib2-devel libX11-devel libXext-devel libXrender-devel  mesa-libGL-devel libICE-devel libSM-devel ncurses-devel"
-
-# Get build utilities
-MY_DIR=$(dirname "${BASH_SOURCE[0]}")
-source $MY_DIR/build_utils.sh
-
-# Development tools and libraries
-yum -y install bzip2 make git patch unzip bison yasm diffutils \
-    automake which file \
-    ${PYTHON_COMPILE_DEPS}
-
-# Install newest autoconf
-build_autoconf $AUTOCONF_ROOT $AUTOCONF_HASH
-autoconf --version
-
-# Compile the latest Python releases.
-# (In order to have a proper SSL module, Python is compiled
-# against a recent openssl [see env vars above], which is linked
-# statically. We delete openssl afterwards.)
-build_openssl $OPENSSL_ROOT $OPENSSL_HASH
-/build_scripts/install_cpython.sh
-
-PY39_BIN=/opt/python/cp39-cp39/bin
-
-# Our openssl doesn't know how to find the system CA trust store
-#   (https://github.com/pypa/manylinux/issues/53)
-# And it's not clear how up-to-date that is anyway
-# So let's just use the same one pip and everyone uses
-$PY39_BIN/pip install certifi
-ln -s $($PY39_BIN/python -c 'import certifi; print(certifi.where())') \
-      /opt/_internal/certs.pem
-# If you modify this line you also have to modify the versions in the
-# Dockerfiles:
-export SSL_CERT_FILE=/opt/_internal/certs.pem
-
-# Install newest curl
-build_curl $CURL_ROOT $CURL_HASH
-rm -rf /usr/local/include/curl /usr/local/lib/libcurl* /usr/local/lib/pkgconfig/libcurl.pc
-hash -r
-curl --version
-curl-config --features
-
-# Install patchelf (latest with unreleased bug fixes)
-curl -sLOk https://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.gz
-# check_sha256sum patchelf-0.9njs2.tar.gz $PATCHELF_HASH
-tar -xzf patchelf-0.10.tar.gz
-(cd patchelf-0.10 && ./configure && make && make install)
-rm -rf patchelf-0.10.tar.gz patchelf-0.10
-
-# Install latest pypi release of auditwheel
-$PY39_BIN/pip install auditwheel
-ln -s $PY39_BIN/auditwheel /usr/local/bin/auditwheel
-
-# Clean up development headers and other unnecessary stuff for
-# final image
-yum -y erase wireless-tools gtk2 libX11 hicolor-icon-theme \
-    avahi freetype bitstream-vera-fonts \
-    ${PYTHON_COMPILE_DEPS} || true > /dev/null 2>&1
-yum -y install ${MANYLINUX1_DEPS}
-yum -y clean all > /dev/null 2>&1
-yum list installed
-
-# we don't need libpython*.a, and they're many megabytes
-find /opt/_internal -name '*.a' -print0 | xargs -0 rm -f
-# Strip what we can -- and ignore errors, because this just attempts to strip
-# *everything*, including non-ELF files:
-find /opt/_internal -type f -print0 \
-    | xargs -0 -n1 strip --strip-unneeded 2>/dev/null || true
-# We do not need the Python test suites, or indeed the precompiled .pyc and
-# .pyo files. Partially cribbed from:
-#    https://github.com/docker-library/python/blob/master/3.4/slim/Dockerfile  # @lint-ignore
-find /opt/_internal \
-     \( -type d -a -name test -o -name tests \) \
-  -o \( -type f -a -name '*.pyc' -o -name '*.pyo' \) \
-  -print0 | xargs -0 rm -f
-
-for PYTHON in /opt/python/*/bin/python; do
-    # Smoke test to make sure that our Pythons work, and do indeed detect as
-    # being manylinux compatible:
-    $PYTHON $MY_DIR/manylinux1-check.py
-    # Make sure that SSL cert checking works
-    $PYTHON $MY_DIR/ssl-check.py
-done
-
-# Fix libc headers to remain compatible with C99 compilers.
-find /usr/include/ -type f -exec sed -i 's/\bextern _*inline_*\b/extern __inline __attribute__ ((__gnu_inline__))/g' {} +
-
-# Now we can delete our built SSL
-rm -rf /usr/local/ssl
--- a/.ci/docker/manywheel/build_scripts/build_utils.sh
+++ b/.ci/docker/manywheel/build_scripts/build_utils.sh
@ -1,91 +0,0 @@
-#!/bin/bash
-# Helper utilities for build
-# Script used only in CD pipeline
-
-OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source/old/1.1.1/  # @lint-ignore
-CURL_DOWNLOAD_URL=https://curl.se/download
-
-AUTOCONF_DOWNLOAD_URL=https://ftp.gnu.org/gnu/autoconf
-
-
-function check_var {
-    if [ -z "$1" ]; then
-        echo "required variable not defined"
-        exit 1
-    fi
-}
-
-
-function do_openssl_build {
-    ./config no-ssl2 no-shared -fPIC --prefix=/usr/local/ssl > /dev/null
-    make > /dev/null
-    make install > /dev/null
-}
-
-
-function check_sha256sum {
-    local fname=$1
-    check_var ${fname}
-    local sha256=$2
-    check_var ${sha256}
-
-    echo "${sha256}  ${fname}" > ${fname}.sha256
-    sha256sum -c ${fname}.sha256
-    rm -f ${fname}.sha256
-}
-
-
-function build_openssl {
-    local openssl_fname=$1
-    check_var ${openssl_fname}
-    local openssl_sha256=$2
-    check_var ${openssl_sha256}
-    check_var ${OPENSSL_DOWNLOAD_URL}
-    curl -sLO ${OPENSSL_DOWNLOAD_URL}/${openssl_fname}.tar.gz
-    check_sha256sum ${openssl_fname}.tar.gz ${openssl_sha256}
-    tar -xzf ${openssl_fname}.tar.gz
-    (cd ${openssl_fname} && do_openssl_build)
-    rm -rf ${openssl_fname} ${openssl_fname}.tar.gz
-}
-
-
-function do_curl_build {
-    LIBS=-ldl ./configure --with-ssl --disable-shared > /dev/null
-    make > /dev/null
-    make install > /dev/null
-}
-
-
-function build_curl {
-    local curl_fname=$1
-    check_var ${curl_fname}
-    local curl_sha256=$2
-    check_var ${curl_sha256}
-    check_var ${CURL_DOWNLOAD_URL}
-    curl -sLO ${CURL_DOWNLOAD_URL}/${curl_fname}.tar.bz2
-    check_sha256sum ${curl_fname}.tar.bz2 ${curl_sha256}
-    tar -jxf ${curl_fname}.tar.bz2
-    (cd ${curl_fname} && do_curl_build)
-    rm -rf ${curl_fname} ${curl_fname}.tar.bz2
-}
-
-
-function do_standard_install {
-    ./configure > /dev/null
-    make > /dev/null
-    make install > /dev/null
-}
-
-
-function build_autoconf {
-    local autoconf_fname=$1
-    check_var ${autoconf_fname}
-    local autoconf_sha256=$2
-    check_var ${autoconf_sha256}
-    check_var ${AUTOCONF_DOWNLOAD_URL}
-    curl -sLO ${AUTOCONF_DOWNLOAD_URL}/${autoconf_fname}.tar.gz
-    check_sha256sum ${autoconf_fname}.tar.gz ${autoconf_sha256}
-    tar -zxf ${autoconf_fname}.tar.gz
-    (cd ${autoconf_fname} && do_standard_install)
-    rm -rf ${autoconf_fname} ${autoconf_fname}.tar.gz
-}
--- a/.ci/docker/manywheel/build_scripts/manylinux1-check.py
+++ b/.ci/docker/manywheel/build_scripts/manylinux1-check.py
@ -1,60 +0,0 @@
-# Logic copied from PEP 513
-
-
-def is_manylinux1_compatible():
-    # Only Linux, and only x86-64 / i686
-    from distutils.util import get_platform
-
-    if get_platform() not in ["linux-x86_64", "linux-i686", "linux-s390x"]:
-        return False
-
-    # Check for presence of _manylinux module
-    try:
-        import _manylinux
-
-        return bool(_manylinux.manylinux1_compatible)
-    except (ImportError, AttributeError):
-        # Fall through to heuristic check below
-        pass
-
-    # Check glibc version. CentOS 5 uses glibc 2.5.
-    return have_compatible_glibc(2, 5)
-
-
-def have_compatible_glibc(major, minimum_minor):
-    import ctypes
-
-    process_namespace = ctypes.CDLL(None)
-    try:
-        gnu_get_libc_version = process_namespace.gnu_get_libc_version
-    except AttributeError:
-        # Symbol doesn't exist -> therefore, we are not linked to
-        # glibc.
-        return False
-
-    # Call gnu_get_libc_version, which returns a string like "2.5".
-    gnu_get_libc_version.restype = ctypes.c_char_p
-    version_str = gnu_get_libc_version()
-    # py2 / py3 compatibility:
-    if not isinstance(version_str, str):
-        version_str = version_str.decode("ascii")
-
-    # Parse string and check against requested version.
-    version = [int(piece) for piece in version_str.split(".")]
-    assert len(version) == 2
-    if major != version[0]:
-        return False
-    if minimum_minor > version[1]:
-        return False
-    return True
-
-
-import sys
-
-
-if is_manylinux1_compatible():
-    print(f"{sys.executable} is manylinux1 compatible")
-    sys.exit(0)
-else:
-    print(f"{sys.executable} is NOT manylinux1 compatible")
-    sys.exit(1)
--- a/.ci/docker/manywheel/build_scripts/ssl-check.py
+++ b/.ci/docker/manywheel/build_scripts/ssl-check.py
@ -1,31 +0,0 @@
-# cf. https://github.com/pypa/manylinux/issues/53
-
-import sys
-from urllib.request import urlopen
-
-
-GOOD_SSL = "https://google.com"
-BAD_SSL = "https://self-signed.badssl.com"
-
-
-print("Testing SSL certificate checking for Python:", sys.version)
-
-if sys.version_info[:2] < (2, 7) or sys.version_info[:2] < (3, 4):
-    print("This version never checks SSL certs; skipping tests")
-    sys.exit(0)
-
-
-EXC = OSError
-
-print(f"Connecting to {GOOD_SSL} should work")
-urlopen(GOOD_SSL)
-print("...it did, yay.")
-
-print(f"Connecting to {BAD_SSL} should fail")
-try:
-    urlopen(BAD_SSL)
-    # If we get here then we failed:
-    print("...it DIDN'T!!!!!11!!1one!")
-    sys.exit(1)
-except EXC:
-    print("...it did, yay.")
--- a/.ci/docker/requirements-ci.txt
+++ b/.ci/docker/requirements-ci.txt
@ -1,390 +0,0 @@
-# Python dependencies required for unit tests
-
-#awscli==1.6 #this breaks some platforms
-#Description: AWS command line interface
-#Pinned versions: 1.6
-#test that import:
-
-boto3==1.35.42
-#Description: AWS SDK for python
-#Pinned versions: 1.19.12, 1.16.34
-#test that import:
-
-click
-#Description: Command Line Interface Creation Kit
-#Pinned versions:
-#test that import:
-
-coremltools==5.0b5 ; python_version < "3.12"
-coremltools==8.3 ; python_version == "3.12"
-#Description: Apple framework for ML integration
-#Pinned versions: 5.0b5
-#test that import:
-
-#dataclasses #this breaks some platforms
-#Description: Provides decorators for auto adding special methods to user classes
-#Pinned versions:
-#test that import:
-
-dill==0.3.7
-#Description: dill extends pickle with serializing and de-serializing for most built-ins
-#Pinned versions: 0.3.7
-#test that import: dynamo/test_replay_record.py test_dataloader.py test_datapipe.py test_serialization.py
-
-expecttest==0.3.0
-#Description: method for writing tests where test framework auto populates
-# the expected output based on previous runs
-#Pinned versions: 0.3.0
-#test that import:
-
-fbscribelogger==0.1.7
-#Description: write to scribe from authenticated jobs on CI
-#Pinned versions: 0.1.6
-#test that import:
-
-flatbuffers==24.12.23
-#Description: cross platform serialization library
-#Pinned versions: 24.12.23
-#test that import:
-
-hypothesis==5.35.1
-# Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136
-#Description: advanced library for generating parametrized tests
-#Pinned versions: 5.35.1
-#test that import: test_xnnpack_integration.py, test_pruning_op.py, test_nn.py
-
-junitparser==2.1.1
-#Description: unitparser handles JUnit/xUnit Result XML files
-#Pinned versions: 2.1.1
-#test that import:
-
-lark==0.12.0
-#Description: parser
-#Pinned versions: 0.12.0
-#test that import:
-
-librosa>=0.6.2 ; python_version < "3.11" and platform_machine != "s390x"
-librosa==0.10.2 ; python_version == "3.12" and platform_machine != "s390x"
-#Description: A python package for music and audio analysis
-#Pinned versions: >=0.6.2
-#test that import: test_spectral_ops.py
-#librosa depends on numba; disable it for s390x while numba is disabled too
-
-#mkl #this breaks linux-bionic-rocm4.5-py3.7
-#Description: Intel oneAPI Math Kernel Library
-#Pinned versions:
-#test that import: test_profiler.py, test_public_bindings.py, test_testing.py,
-#test_nn.py, test_mkldnn.py, test_jit.py, test_fx_experimental.py,
-#test_autograd.py
-
-#mkl-devel
-# see mkl
-
-#mock
-#Description: A testing library that allows you to replace parts of your
-#system under test with mock objects
-#Pinned versions:
-#test that import: test_modules.py, test_nn.py,
-#test_testing.py
-
-#MonkeyType # breaks pytorch-xla-linux-bionic-py3.7-clang8
-#Description: collects runtime types of function arguments and return
-#values, and can automatically generate stub files
-#Pinned versions:
-#test that import:
-
-mypy==1.16.0 ; platform_system != "Windows"
-# Pin MyPy version because new errors are likely to appear with each release
-# Skip on Windows as lots of type annotations are POSIX specific
-#Description: linter
-#Pinned versions: 1.16.0
-#test that import: test_typing.py, test_type_hints.py
-
-networkx==2.8.8
-#Description: creation, manipulation, and study of
-#the structure, dynamics, and functions of complex networks
-#Pinned versions: 2.8.8
-#test that import: functorch
-
-ninja==1.11.1.3
-#Description: build system. Used in some tests. Used in build to generate build
-#time tracing information
-#Pinned versions: 1.11.1.3
-#test that import: run_test.py, test_cpp_extensions_aot.py,test_determination.py
-
-numba==0.55.2 ; python_version == "3.10" and platform_machine != "s390x"
-numba==0.60.0 ; python_version == "3.12" and platform_machine != "s390x"
-#Description: Just-In-Time Compiler for Numerical Functions
-#Pinned versions: 0.54.1, 0.49.0, <=0.49.1
-#test that import: test_numba_integration.py
-#For numba issue see https://github.com/pytorch/pytorch/issues/51511
-#Need release > 0.61.2 for s390x due to https://github.com/numba/numba/pull/10073
-
-#numpy
-#Description: Provides N-dimensional arrays and linear algebra
-#Pinned versions: 1.26.2
-#test that import: test_view_ops.py, test_unary_ufuncs.py, test_type_promotion.py,
-#test_type_info.py, test_torch.py, test_tensorexpr_pybind.py, test_tensorexpr.py,
-#test_tensorboard.py, test_tensor_creation_ops.py, test_static_runtime.py,
-#test_spectral_ops.py, test_sort_and_select.py, test_shape_ops.py,
-#test_segment_reductions.py, test_reductions.py, test_pruning_op.py,
-#test_overrides.py, test_numpy_interop.py, test_numba_integration.py
-#test_nn.py, test_namedtensor.py, test_linalg.py, test_jit_cuda_fuser.py,
-#test_jit.py, test_indexing.py, test_datapipe.py, test_dataloader.py,
-#test_binary_ufuncs.py
-numpy==1.22.4; python_version == "3.10"
-numpy==1.26.2; python_version == "3.11" or python_version == "3.12"
-numpy==2.1.2; python_version >= "3.13"
-
-pandas==2.0.3; python_version < "3.13"
-pandas==2.2.3; python_version >= "3.13"
-
-#onnxruntime
-#Description: scoring engine for Open Neural Network Exchange (ONNX) models
-#Pinned versions: 1.9.0
-#test that import:
-
-opt-einsum==3.3
-#Description: Python library to optimize tensor contraction order, used in einsum
-#Pinned versions: 3.3
-#test that import: test_linalg.py
-
-optree==0.13.0
-#Description: A library for tree manipulation
-#Pinned versions: 0.13.0
-#test that import: test_vmap.py, test_aotdispatch.py, test_dynamic_shapes.py,
-#test_pytree.py, test_ops.py, test_control_flow.py, test_modules.py,
-#common_utils.py, test_eager_transforms.py, test_python_dispatch.py,
-#test_expanded_weights.py, test_decomp.py, test_overrides.py, test_masked.py,
-#test_ops.py, test_prims.py, test_subclass.py, test_functionalization.py,
-#test_schema_check.py, test_profiler_tree.py, test_meta.py, test_torchxla_num_output.py,
-#test_utils.py, test_proxy_tensor.py, test_memory_profiler.py, test_view_ops.py,
-#test_pointwise_ops.py, test_dtensor_ops.py, test_torchinductor.py, test_fx.py,
-#test_fake_tensor.py, test_mps.py
-
-pillow==11.0.0
-#Description:  Python Imaging Library fork
-#Pinned versions: 10.3.0
-#test that import:
-
-protobuf==5.29.4
-#Description:  Google's data interchange format
-#Pinned versions: 5.29.4
-#test that import: test_tensorboard.py, test/onnx/*
-
-psutil
-#Description: information on running processes and system utilization
-#Pinned versions:
-#test that import: test_profiler.py, test_openmp.py, test_dataloader.py
-
-pytest==7.3.2
-#Description: testing framework
-#Pinned versions:
-#test that import: test_typing.py, test_cpp_extensions_aot.py, run_test.py
-
-pytest-xdist==3.3.1
-#Description: plugin for running pytest in parallel
-#Pinned versions:
-#test that import:
-
-pytest-flakefinder==1.1.0
-#Description: plugin for rerunning tests a fixed number of times in pytest
-#Pinned versions: 1.1.0
-#test that import:
-
-pytest-rerunfailures>=10.3
-#Description: plugin for rerunning failure tests in pytest
-#Pinned versions:
-#test that import:
-
-pytest-subtests==0.13.1
-#Description: plugin for subtest support
-#Pinned versions:
-#test that import:
-
-#pytest-benchmark
-#Description: fixture for benchmarking code
-#Pinned versions: 3.2.3
-#test that import:
-
-#pytest-sugar
-#Description: shows failures and errors instantly
-#Pinned versions:
-#test that import:
-
-xdoctest==1.1.0
-#Description: runs doctests in pytest
-#Pinned versions: 1.1.0
-#test that import:
-
-pygments==2.15.0
-#Description: support doctest highlighting
-#Pinned versions: 2.12.0
-#test that import: the doctests
-
-#pyyaml
-#Description: data serialization format
-#Pinned versions: 6.0.2
-#test that import:
-
-#requests
-#Description: HTTP library
-#Pinned versions:
-#test that import: test_type_promotion.py
-
-#rich
-#Description: rich text and beautiful formatting in the terminal
-#Pinned versions: 14.1.0
-#test that import:
-
-scikit-image==0.19.3 ; python_version < "3.10"
-scikit-image==0.22.0 ; python_version >= "3.10"
-#Description: image processing routines
-#Pinned versions:
-#test that import: test_nn.py
-
-#scikit-learn
-#Description: machine learning package
-#Pinned versions: 0.20.3
-#test that import:
-
-scipy==1.10.1 ; python_version <= "3.11"
-scipy==1.14.1 ; python_version >= "3.12"
-# Pin SciPy because of failing distribution tests (see #60347)
-#Description: scientific python
-#Pinned versions: 1.10.1
-#test that import: test_unary_ufuncs.py, test_torch.py,test_tensor_creation_ops.py
-#test_spectral_ops.py, test_sparse_csr.py, test_reductions.py,test_nn.py
-#test_linalg.py, test_binary_ufuncs.py
-
-#tabulate
-#Description: Pretty-print tabular data
-#Pinned versions:
-#test that import:
-
-# needed by torchgen utils
-typing-extensions>=4.10.0
-#Description: type hints for python
-#Pinned versions:
-#test that import:
-
-#virtualenv
-#Description: virtual environment for python
-#Pinned versions:
-#test that import:
-
-unittest-xml-reporting<=3.2.0,>=2.0.0
-#Description: saves unit test results to xml
-#Pinned versions:
-#test that import:
-
-#lintrunner is supported on aarch64-linux only from 0.12.4 version
-lintrunner==0.12.7
-#Description: all about linters!
-#Pinned versions: 0.12.7
-#test that import:
-
-redis>=4.0.0
-#Description: redis database
-#test that import: anything that tests OSS caching/mocking (inductor/test_codecache.py, inductor/test_max_autotune.py)
-
-ghstack==0.8.0
-#Description: ghstack tool
-#Pinned versions: 0.8.0
-#test that import:
-
-jinja2==3.1.6
-#Description: jinja2 template engine
-#Pinned versions: 3.1.4
-#test that import:
-
-pytest-cpp==2.3.0
-#Description: This is used by pytest to invoke C++ tests
-#Pinned versions: 2.3.0
-#test that import:
-
-z3-solver==4.15.1.0 ; platform_machine != "s390x"
-#Description: The Z3 Theorem Prover Project
-#Pinned versions:
-#test that import:
-
-tensorboard==2.13.0 ; python_version < "3.13"
-tensorboard==2.18.0 ; python_version >= "3.13"
-#Description: Also included in .ci/docker/requirements-docs.txt
-#Pinned versions:
-#test that import: test_tensorboard
-
-pywavelets==1.4.1 ; python_version < "3.12"
-pywavelets==1.7.0 ; python_version >= "3.12"
-#Description: This is a requirement of scikit-image, we need to pin
-# it here because 1.5.0 conflicts with numpy 1.21.2 used in CI
-#Pinned versions: 1.4.1
-#test that import:
-
-lxml==5.3.0
-#Description: This is a requirement of unittest-xml-reporting
-
-PyGithub==2.3.0
-
-sympy==1.13.3
-#Description: Required by coremltools, also pinned in .github/requirements/pip-requirements-macOS.txt
-#Pinned versions:
-#test that import:
-
-onnx==1.18.0
-#Description: Required by onnx tests, and mypy and test_public_bindings.py when checking torch.onnx._internal
-#Pinned versions:
-#test that import:
-
-onnxscript==0.4.0
-#Description: Required by mypy and test_public_bindings.py when checking torch.onnx._internal
-#Pinned versions:
-#test that import:
-
-parameterized==0.8.1
-#Description: Parameterizes unittests, both the tests themselves and the entire testing class
-#Pinned versions:
-#test that import:
-
-#Description: required for testing torch/distributed/_tools/sac_estimator.py
-#Pinned versions: 1.24.0
-#test that import: test_sac_estimator.py
-
-pwlf==2.2.1
-#Description: required for testing torch/distributed/_tools/sac_estimator.py
-#Pinned versions: 2.2.1
-#test that import: test_sac_estimator.py
-
-# To build PyTorch itself
-pyyaml
-pyzstd
-setuptools>=70.1.0
-six
-
-scons==4.5.2 ; platform_machine == "aarch64"
-
-pulp==2.9.0
-#Description: required for testing ilp formulaiton under torch/distributed/_tools
-#Pinned versions: 2.9.0
-#test that import: test_sac_ilp.py
-
-dataclasses_json==0.6.7
-#Description: required for data pipeline and scripts under tools/stats
-#Pinned versions: 0.6.7
-#test that import:
-
-cmake==4.0.0
-#Description: required for building
-
-tlparse==0.4.0
-#Description: required for log parsing
-
-cuda-bindings>=12.0,<13.0 ; platform_machine != "s390x"
-#Description: required for testing CUDAGraph::raw_cuda_graph(). See https://nvidia.github.io/cuda-python/cuda-bindings/latest/support.html for how this version was chosen. Note "Any fix in the latest bindings would be backported to the prior major version" means that only the newest version of cuda-bindings will get fixes. Depending on the latest version of 12.x is okay because all 12.y versions will be supported via "CUDA minor version compatibility". Pytorch builds against 13.z versions of cuda toolkit work with 12.x versions of cuda-bindings as well because newer drivers work with old toolkits.
-#test that import: test_cuda.py
-
-setuptools-git-versioning==2.1.0
-scikit-build==0.18.1
-pyre-extensions==0.0.32
-tabulate==0.9.0
-#Description: These package are needed to build FBGEMM and torchrec on PyTorch CI
--- a/.ci/docker/requirements-docs.txt
+++ b/.ci/docker/requirements-docs.txt
@ -1,61 +0,0 @@
-sphinx==5.3.0
-#Description: This is used to generate PyTorch docs
-#Pinned versions: 5.3.0
-e git+https://github.com/pytorch/pytorch_sphinx_theme.git@d53b0ffb9b1cda68260693ea98f3483823c88d8e#egg=pytorch_sphinx_theme2
-
-# TODO: sphinxcontrib.katex 0.9.0 adds a local KaTeX server to speed up pre-rendering
-# but it doesn't seem to work and hangs around idly. The initial thought that it is probably
-# something related to Docker setup. We can investigate this later.
-
-sphinxcontrib.katex==0.8.6
-#Description: This is used to generate PyTorch docs
-#Pinned versions: 0.8.6
-
-sphinxext-opengraph==0.9.1
-#Description: This is used to generate PyTorch docs
-#Pinned versions: 0.9.1
-
-sphinx_sitemap==2.6.0
-#Description: This is used to generate sitemap for PyTorch docs
-#Pinned versions: 2.6.0
-
-matplotlib==3.5.3 ; python_version < "3.13"
-matplotlib==3.6.3 ; python_version >= "3.13"
-#Description: This is used to generate PyTorch docs
-#Pinned versions: 3.6.3 if python > 3.12. Otherwise 3.5.3.
-
-tensorboard==2.13.0 ; python_version < "3.13"
-tensorboard==2.18.0 ; python_version >= "3.13"
-#Description: This is used to generate PyTorch docs
-#Pinned versions: 2.13.0
-
-breathe==4.34.0
-#Description: This is used to generate PyTorch C++ docs
-#Pinned versions: 4.34.0
-
-exhale==0.2.3
-#Description: This is used to generate PyTorch C++ docs
-#Pinned versions: 0.2.3
-
-docutils==0.16
-#Description: This is used to generate PyTorch C++ docs
-#Pinned versions: 0.16
-
-bs4==0.0.1
-#Description: This is used to generate PyTorch C++ docs
-#Pinned versions: 0.0.1
-
-IPython==8.12.0
-#Description: This is used to generate PyTorch functorch docs
-#Pinned versions: 8.12.0
-
-myst-nb==0.17.2
-#Description: This is used to generate PyTorch functorch and torch.compile docs.
-#Pinned versions: 0.17.2
-
-# The following are required to build torch.distributed.elastic.rendezvous.etcd* docs
-python-etcd==0.4.5
-sphinx-copybutton==0.5.0
-sphinx-design==0.4.0
-sphinxcontrib-mermaid==1.0.0
-myst-parser==0.18.1
--- a/.ci/docker/triton_version.txt
+++ b/.ci/docker/triton_version.txt
@ -1 +0,0 @@
-3.5.0
--- a/.ci/docker/triton_xpu_version.txt
+++ b/.ci/docker/triton_xpu_version.txt
@ -1 +0,0 @@
-3.5.0
--- a/.ci/docker/ubuntu-cross-riscv/Dockerfile
+++ b/.ci/docker/ubuntu-cross-riscv/Dockerfile
@ -1,155 +0,0 @@
-# Cross-compilation Docker container for RISC-V architecture
-ARG UBUNTU_VERSION
-FROM --platform=linux/amd64 ubuntu:${UBUNTU_VERSION} as base
-
-ARG UBUNTU_VERSION
-
-ENV GCC_VERSION=14
-ENV PYTHON_VERSION=3.12.3
-ENV DEBIAN_FRONTEND=noninteractive
-ENV CC=riscv64-linux-gnu-gcc-${GCC_VERSION}
-ENV CXX=riscv64-linux-gnu-g++-${GCC_VERSION}
-ENV QEMU_LD_PREFIX=/usr/riscv64-linux-gnu/
-ENV SYSROOT=/opt/sysroot
-
-# Install basic dependencies
-RUN apt-get update && apt-get install -y \
-    ninja-build \
-    autoconf \
-    automake \
-    libtool \
-    patchelf \
-    ccache \
-    git \
-    wget \
-    python3-pip \
-    python3-venv \
-    python-is-python3 \
-    cmake \
-    sudo \
-    lsb-release \
-    gcc-${GCC_VERSION}-riscv64-linux-gnu \
-    g++-${GCC_VERSION}-riscv64-linux-gnu \
-    pkg-config \
-    && rm -rf /var/lib/apt/lists/*
-
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
-FROM base as python
-ARG ZLIB_VERSION=1.3.1
-ARG FFI_VERSION=3.4.6
-ARG BZ2_VERSION=1.0.8
-ARG XZ_VERSION=5.4.6
-ARG OPENSSL_VERSION=3.2.1
-
-# Set up sysroot directory for dependencies
-ENV PKG_CONFIG_PATH=${SYSROOT}/lib/pkgconfig
-ENV PKG_CONFIG_SYSROOT_DIR=${SYSROOT}
-
-WORKDIR /opt
-
-# Build zlib (for compression)
-RUN echo "--- Building zlib ---" \
-    && wget -c https://www.zlib.net/zlib-${ZLIB_VERSION}.tar.gz \
-    && tar -xf zlib-${ZLIB_VERSION}.tar.gz --no-same-permissions --no-same-owner \
-    && cd zlib-${ZLIB_VERSION}/ \
-    && mkdir build && cd build \
-    && ../configure --prefix=${SYSROOT} \
-    && make -j$(nproc) && make install \
-    && cd ../..
-
-# Build libffi (for ctypes module)
-RUN echo "--- Building libffi ---" \
-    && wget -c https://github.com/libffi/libffi/releases/download/v${FFI_VERSION}/libffi-${FFI_VERSION}.tar.gz \
-    && tar -xf libffi-${FFI_VERSION}.tar.gz --no-same-permissions --no-same-owner \
-    && cd libffi-${FFI_VERSION}/ \
-    && mkdir build && cd build \
-    && ../configure --prefix=${SYSROOT} --host=riscv64-linux-gnu --build=x86_64-linux-gnu \
-    && make -j$(nproc) && make install \
-    && cd ../..
-
-# Build bzip2 (for bz2 module)
-RUN echo "--- Building bzip2 ---" \
-    && wget -c https://sourceware.org/pub/bzip2/bzip2-${BZ2_VERSION}.tar.gz \
-    && tar -xf bzip2-${BZ2_VERSION}.tar.gz --no-same-permissions --no-same-owner \
-    && cd bzip2-${BZ2_VERSION}/ \
-    && make CC=riscv64-linux-gnu-gcc-${GCC_VERSION} bzip2 bzip2recover libbz2.a \
-    && make CC=riscv64-linux-gnu-gcc-${GCC_VERSION} -f Makefile-libbz2_so \
-    && make install PREFIX=${SYSROOT} \
-    && cp libbz2.so.${BZ2_VERSION} ${SYSROOT}/lib/ \
-    && cd ${SYSROOT}/lib/ \
-    && ln -sf libbz2.so.${BZ2_VERSION} libbz2.so.1.0 \
-    && ln -sf libbz2.so.1.0 libbz2.so \
-    && cd /opt/
-
-# Build xz (for lzma module)
-RUN echo "--- Building xz ---" \
-    && wget -c https://github.com/tukaani-project/xz/releases/download/v${XZ_VERSION}/xz-${XZ_VERSION}.tar.gz \
-    && tar -xf xz-${XZ_VERSION}.tar.gz --no-same-permissions --no-same-owner \
-    && cd xz-${XZ_VERSION} \
-    && mkdir build && cd build \
-    && ../configure --prefix=${SYSROOT} --host=riscv64-linux-gnu --build=x86_64-linux-gnu \
-    && make -j$(nproc) && make install \
-    && cd ../..
-
-# Build OpenSSL (for ssl module)
-RUN echo "--- Building OpenSSL ---" \
-    && wget -c https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz \
-    && tar -xf openssl-${OPENSSL_VERSION}.tar.gz --no-same-permissions --no-same-owner \
-    && cd openssl-${OPENSSL_VERSION}/ \
-    && mkdir build && cd build \
-    && ../Configure linux64-riscv64 --prefix=${SYSROOT} \
-    && make -j$(nproc) && make install_sw \
-    && cd ../..
-
-# Build SQLite3 (for sqlite3 module)
-RUN echo "--- Building SQLite3 ---" \
-    && wget -c https://www.sqlite.org/2024/sqlite-autoconf-3450200.tar.gz \
-    && tar -xf sqlite-autoconf-3450200.tar.gz --no-same-permissions --no-same-owner \
-    && cd sqlite-autoconf-3450200 \
-    && mkdir build && cd build \
-    && ../configure --prefix=${SYSROOT} --host=riscv64-linux-gnu --build=x86_64-linux-gnu \
-    && make -j$(nproc) && make install \
-    && cd ../..
-
-# Build and install RISC-V Python with all modules
-RUN wget -c https://www.python.org/ftp/python/${PYTHON_VERSION}/Python-${PYTHON_VERSION}.tgz \
-    && tar -xf Python-${PYTHON_VERSION}.tgz --no-same-permissions --no-same-owner \
-    && cd Python-${PYTHON_VERSION} \
-    && mkdir build && cd build \
-    && ../configure \
-        --host=riscv64-linux-gnu \
-        --build=x86_64-linux-gnu \
-        --prefix=${SYSROOT} \
-        --enable-shared \
-        --disable-ipv6 \
-        --with-build-python=/usr/bin/python3 \
-        --with-ensurepip=no \
-        ac_cv_file__dev_ptmx=yes \
-        ac_cv_file__dev_ptc=no \
-    && make -j$(nproc) \
-    && make install
-
-FROM base as final
-COPY --from=python             /opt/sysroot                       /opt/sysroot
-
-# Install crossenv and cmake
-RUN pip install crossenv cmake==4.0.0 --break-system-packages \
-    && /usr/bin/python3 -m crossenv ${SYSROOT}/bin/python3 /opt/riscv-cross-env
-
-# Add pip-installed cmake binaries to PATH
-ENV PATH="/usr/local/bin:${PATH}"
-
-# Set up cross Python environment
-SHELL ["/bin/bash", "-c"]
-RUN source /opt/riscv-cross-env/bin/activate \
-    && pip install setuptools pyyaml typing_extensions wheel
-
-# Set default environment variables for PyTorch build
-ENV Python_ROOT_DIR=${SYSROOT}
-ENV OPENSSL_ROOT_DIR=${SYSROOT}
-
-USER jenkins
-CMD ["bash"]
--- a/.ci/docker/ubuntu-rocm/Dockerfile
+++ b/.ci/docker/ubuntu-rocm/Dockerfile
@ -1,144 +0,0 @@
-ARG UBUNTU_VERSION
-
-FROM ubuntu:${UBUNTU_VERSION}
-
-ARG UBUNTU_VERSION
-
-ENV DEBIAN_FRONTEND noninteractive
-
-# Set AMD gpu targets to build for
-ARG PYTORCH_ROCM_ARCH
-ENV PYTORCH_ROCM_ARCH ${PYTORCH_ROCM_ARCH}
-
-# Install common dependencies (so that this step can be cached separately)
-COPY ./common/install_base.sh install_base.sh
-RUN bash ./install_base.sh && rm install_base.sh
-
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
-# Install katex
-ARG KATEX
-COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
-RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
-
-# Install conda and other packages (e.g., numpy, pytest)
-ARG ANACONDA_PYTHON_VERSION
-ARG BUILD_ENVIRONMENT
-ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
-ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
-COPY requirements-ci.txt /opt/conda/requirements-ci.txt
-COPY ./common/install_conda.sh install_conda.sh
-COPY ./common/common_utils.sh common_utils.sh
-RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt
-
-# Install gcc
-ARG GCC_VERSION
-COPY ./common/install_gcc.sh install_gcc.sh
-RUN bash ./install_gcc.sh && rm install_gcc.sh
-
-# Install clang
-ARG CLANG_VERSION
-COPY ./common/install_clang.sh install_clang.sh
-RUN bash ./install_clang.sh && rm install_clang.sh
-
-# (optional) Install vision packages like OpenCV
-ARG VISION
-COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
-RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
-RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
-ENV INSTALLED_VISION ${VISION}
-
-# Install rocm
-ARG ROCM_VERSION
-RUN mkdir ci_commit_pins
-COPY ./common/common_utils.sh common_utils.sh
-COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt
-COPY ./common/install_rocm.sh install_rocm.sh
-RUN bash ./install_rocm.sh
-RUN rm install_rocm.sh common_utils.sh
-RUN rm -r ci_commit_pins
-COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
-RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
-RUN rm install_rocm_magma.sh
-ADD ./common/install_miopen.sh install_miopen.sh
-RUN bash ./install_miopen.sh ${ROCM_VERSION} && rm install_miopen.sh
-ENV ROCM_PATH /opt/rocm
-ENV PATH /opt/rocm/bin:$PATH
-ENV PATH /opt/rocm/hcc/bin:$PATH
-ENV PATH /opt/rocm/hip/bin:$PATH
-ENV PATH /opt/rocm/opencl/bin:$PATH
-ENV PATH /opt/rocm/llvm/bin:$PATH
-ENV MAGMA_HOME /opt/rocm/magma
-ENV LANG C.UTF-8
-ENV LC_ALL C.UTF-8
-
-# Install amdsmi
-COPY ./common/install_amdsmi.sh install_amdsmi.sh
-RUN bash ./install_amdsmi.sh
-RUN rm install_amdsmi.sh
-
-# (optional) Install UCC
-ARG UCX_COMMIT
-ARG UCC_COMMIT
-ENV UCX_COMMIT $UCX_COMMIT
-ENV UCC_COMMIT $UCC_COMMIT
-ENV UCX_HOME /usr
-ENV UCC_HOME /usr
-ADD ./common/install_ucc.sh install_ucc.sh
-RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
-RUN rm install_ucc.sh
-
-COPY ./common/install_openssl.sh install_openssl.sh
-ENV OPENSSL_ROOT_DIR /opt/openssl
-RUN bash ./install_openssl.sh
-ENV OPENSSL_DIR /opt/openssl
-
-ARG INDUCTOR_BENCHMARKS
-ARG ANACONDA_PYTHON_VERSION
-ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
-COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
-COPY ci_commit_pins/timm.txt timm.txt
-COPY ci_commit_pins/torchbench.txt torchbench.txt
-RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
-RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
-
-# (optional) Install non-default Ninja version
-ARG NINJA_VERSION
-COPY ./common/install_ninja.sh install_ninja.sh
-RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
-RUN rm install_ninja.sh
-
-ARG TRITON
-# Install triton, this needs to be done before sccache because the latter will
-# try to reach out to S3, which docker build runners don't have access
-COPY ./common/install_triton.sh install_triton.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton.txt triton.txt
-COPY triton_version.txt triton_version.txt
-RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton.txt triton_version.txt
-
-
-# Install ccache/sccache (do this last, so we get priority in PATH)
-COPY ./common/install_cache.sh install_cache.sh
-ENV PATH /opt/cache/bin:$PATH
-RUN bash ./install_cache.sh && rm install_cache.sh
-
-# Install Open MPI for ROCm
-COPY ./common/install_openmpi.sh install_openmpi.sh
-RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
-RUN rm install_openmpi.sh
-
-# Include BUILD_ENVIRONMENT environment variable in image
-ARG BUILD_ENVIRONMENT
-ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
-
-# Install LLVM dev version (Defined in the pytorch/builder github repository)
-COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
-
-USER jenkins
-CMD ["bash"]
--- a/.ci/docker/ubuntu-xpu/Dockerfile
+++ b/.ci/docker/ubuntu-xpu/Dockerfile
@ -1,105 +0,0 @@
-ARG UBUNTU_VERSION
-
-FROM ubuntu:${UBUNTU_VERSION}
-
-ARG UBUNTU_VERSION
-
-ENV DEBIAN_FRONTEND noninteractive
-
-ARG CLANG_VERSION
-
-# Install common dependencies (so that this step can be cached separately)
-COPY ./common/install_base.sh install_base.sh
-RUN bash ./install_base.sh && rm install_base.sh
-
-# Install clang
-ARG LLVMDEV
-COPY ./common/install_clang.sh install_clang.sh
-RUN bash ./install_clang.sh && rm install_clang.sh
-
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
-# Install katex
-ARG KATEX
-COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
-RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
-
-# Install conda and other packages (e.g., numpy, pytest)
-ARG ANACONDA_PYTHON_VERSION
-ARG DOCS
-ARG BUILD_ENVIRONMENT
-ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
-ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
-ENV DOCS=$DOCS
-COPY requirements-ci.txt requirements-docs.txt /opt/conda/
-COPY ./common/install_conda.sh install_conda.sh
-COPY ./common/common_utils.sh common_utils.sh
-RUN bash ./install_conda.sh && rm install_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
-
-# Install gcc
-ARG GCC_VERSION
-COPY ./common/install_gcc.sh install_gcc.sh
-RUN bash ./install_gcc.sh && rm install_gcc.sh
-
-# Install lcov for C++ code coverage
-COPY ./common/install_lcov.sh install_lcov.sh
-RUN  bash ./install_lcov.sh && rm install_lcov.sh
-
-COPY ./common/install_openssl.sh install_openssl.sh
-RUN bash ./install_openssl.sh
-ENV OPENSSL_ROOT_DIR /opt/openssl
-ENV OPENSSL_DIR /opt/openssl
-RUN rm install_openssl.sh
-
-ARG INDUCTOR_BENCHMARKS
-COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
-COPY ci_commit_pins/timm.txt timm.txt
-RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
-RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt
-
-# Install XPU Dependencies
-ARG XPU_VERSION
-COPY ./common/install_xpu.sh install_xpu.sh
-RUN bash ./install_xpu.sh && rm install_xpu.sh
-
-ARG TRITON
-# Install triton, this needs to be done before sccache because the latter will
-# try to reach out to S3, which docker build runners don't have access
-COPY ./common/install_triton.sh install_triton.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton-xpu.txt triton-xpu.txt
-COPY triton_xpu_version.txt triton_version.txt
-RUN if [ -n "${TRITON}" ]; then bash ./install_triton.sh; fi
-RUN rm install_triton.sh common_utils.sh triton-xpu.txt triton_version.txt
-
-# (optional) Install vision packages like OpenCV
-ARG VISION
-COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
-RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
-RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
-ENV INSTALLED_VISION ${VISION}
-
-# (optional) Install non-default Ninja version
-ARG NINJA_VERSION
-COPY ./common/install_ninja.sh install_ninja.sh
-RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
-RUN rm install_ninja.sh
-
-# Install ccache/sccache (do this last, so we get priority in PATH)
-COPY ./common/install_cache.sh install_cache.sh
-ENV PATH /opt/cache/bin:$PATH
-RUN bash ./install_cache.sh && rm install_cache.sh
-
-# Include BUILD_ENVIRONMENT environment variable in image
-ARG BUILD_ENVIRONMENT
-ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
-
-# Install LLVM dev version (Defined in the pytorch/builder github repository)
-COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
-
-USER jenkins
-CMD ["bash"]
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@ -1,189 +0,0 @@
-ARG UBUNTU_VERSION
-
-FROM ubuntu:${UBUNTU_VERSION} as base
-
-ARG UBUNTU_VERSION
-
-ENV DEBIAN_FRONTEND noninteractive
-
-ARG CLANG_VERSION
-
-# Install common dependencies (so that this step can be cached separately)
-COPY ./common/install_base.sh install_base.sh
-RUN bash ./install_base.sh && rm install_base.sh
-
-# Install clang
-ARG LLVMDEV
-COPY ./common/install_clang.sh install_clang.sh
-RUN bash ./install_clang.sh && rm install_clang.sh
-
-# Install user
-COPY ./common/install_user.sh install_user.sh
-RUN bash ./install_user.sh && rm install_user.sh
-
-# Install katex
-ARG KATEX
-COPY ./common/install_docs_reqs.sh install_docs_reqs.sh
-RUN bash ./install_docs_reqs.sh && rm install_docs_reqs.sh
-
-# Install conda and other packages (e.g., numpy, pytest)
-ARG ANACONDA_PYTHON_VERSION
-ARG DOCS
-ENV ANACONDA_PYTHON_VERSION=$ANACONDA_PYTHON_VERSION
-ENV PATH /opt/conda/envs/py_$ANACONDA_PYTHON_VERSION/bin:/opt/conda/bin:$PATH
-ENV DOCS=$DOCS
-COPY requirements-ci.txt requirements-docs.txt /opt/conda/
-COPY ./common/install_conda.sh install_conda.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ./common/install_magma_conda.sh install_magma_conda.sh
-RUN bash ./install_conda.sh && rm install_conda.sh install_magma_conda.sh common_utils.sh /opt/conda/requirements-ci.txt /opt/conda/requirements-docs.txt
-RUN if [ -n "${UNINSTALL_DILL}" ]; then pip uninstall -y dill; fi
-
-# Install gcc
-ARG GCC_VERSION
-COPY ./common/install_gcc.sh install_gcc.sh
-RUN bash ./install_gcc.sh && rm install_gcc.sh
-
-# Install lcov for C++ code coverage
-COPY ./common/install_lcov.sh install_lcov.sh
-RUN  bash ./install_lcov.sh && rm install_lcov.sh
-
-# Install cuda and cudnn
-ARG CUDA_VERSION
-COPY ./common/install_cuda.sh install_cuda.sh
-COPY ./common/install_nccl.sh install_nccl.sh
-COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
-COPY ./common/install_cusparselt.sh install_cusparselt.sh
-RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu* install_cusparselt.sh
-ENV DESIRED_CUDA ${CUDA_VERSION}
-ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
-# No effect if cuda not installed
-ENV USE_SYSTEM_NCCL=1
-ENV NCCL_INCLUDE_DIR="/usr/local/cuda/include/"
-ENV NCCL_LIB_DIR="/usr/local/cuda/lib64/"
-
-
-# (optional) Install UCC
-ARG UCX_COMMIT
-ARG UCC_COMMIT
-ARG CUDA_VERSION
-ENV UCX_COMMIT $UCX_COMMIT
-ENV UCC_COMMIT $UCC_COMMIT
-ENV UCX_HOME /usr
-ENV UCC_HOME /usr
-ADD ./common/install_ucc.sh install_ucc.sh
-RUN if [ -n "${UCX_COMMIT}" ] && [ -n "${UCC_COMMIT}" ]; then bash ./install_ucc.sh; fi
-RUN rm install_ucc.sh
-
-# (optional) Install vision packages like OpenCV
-ARG VISION
-COPY ./common/install_vision.sh ./common/cache_vision_models.sh ./common/common_utils.sh ./
-RUN if [ -n "${VISION}" ]; then bash ./install_vision.sh; fi
-RUN rm install_vision.sh cache_vision_models.sh common_utils.sh
-ENV INSTALLED_VISION ${VISION}
-
-# (optional) Install non-default Ninja version
-ARG NINJA_VERSION
-COPY ./common/install_ninja.sh install_ninja.sh
-RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
-RUN rm install_ninja.sh
-
-COPY ./common/install_openssl.sh install_openssl.sh
-RUN bash ./install_openssl.sh
-ENV OPENSSL_ROOT_DIR /opt/openssl
-ENV OPENSSL_DIR /opt/openssl
-RUN rm install_openssl.sh
-
-ARG INDUCTOR_BENCHMARKS
-COPY ./common/install_inductor_benchmark_deps.sh install_inductor_benchmark_deps.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/huggingface-requirements.txt huggingface-requirements.txt
-COPY ci_commit_pins/timm.txt timm.txt
-COPY ci_commit_pins/torchbench.txt torchbench.txt
-RUN if [ -n "${INDUCTOR_BENCHMARKS}" ]; then bash ./install_inductor_benchmark_deps.sh; fi
-RUN rm install_inductor_benchmark_deps.sh common_utils.sh timm.txt huggingface-requirements.txt torchbench.txt
-
-ARG TRITON
-ARG TRITON_CPU
-
-# Create a separate stage for building Triton and Triton-CPU.  install_triton
-# will check for the presence of env vars
-FROM base as triton-builder
-COPY ./common/install_triton.sh install_triton.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/triton.txt triton.txt
-COPY ci_commit_pins/triton-cpu.txt triton-cpu.txt
-RUN bash ./install_triton.sh
-
-FROM base as final
-COPY --from=triton-builder /opt/triton /opt/triton
-RUN if [ -n "${TRITON}" ] || [ -n "${TRITON_CPU}" ]; then pip install /opt/triton/*.whl; chown -R jenkins:jenkins /opt/conda; fi
-RUN rm -rf /opt/triton
-
-ARG EXECUTORCH
-# Build and install executorch
-COPY ./common/install_executorch.sh install_executorch.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/executorch.txt executorch.txt
-RUN if [ -n "${EXECUTORCH}" ]; then bash ./install_executorch.sh; fi
-RUN rm install_executorch.sh common_utils.sh executorch.txt
-
-ARG HALIDE
-# Build and install halide
-COPY ./common/install_halide.sh install_halide.sh
-COPY ./common/common_utils.sh common_utils.sh
-COPY ci_commit_pins/halide.txt halide.txt
-RUN if [ -n "${HALIDE}" ]; then bash ./install_halide.sh; fi
-RUN rm install_halide.sh common_utils.sh halide.txt
-
-ARG ONNX
-# Install ONNX dependencies
-COPY ./common/install_onnx.sh ./common/common_utils.sh ./
-RUN if [ -n "${ONNX}" ]; then bash ./install_onnx.sh; fi
-RUN rm install_onnx.sh common_utils.sh
-
-# (optional) Build ACL
-ARG ACL
-COPY ./common/install_acl.sh install_acl.sh
-RUN if [ -n "${ACL}" ]; then bash ./install_acl.sh; fi
-RUN rm install_acl.sh
-ENV INSTALLED_ACL ${ACL}
-
-ARG OPENBLAS
-COPY ./common/install_openblas.sh install_openblas.sh
-RUN if [ -n "${OPENBLAS}" ]; then bash ./install_openblas.sh; fi
-RUN rm install_openblas.sh
-ENV INSTALLED_OPENBLAS ${OPENBLAS}
-
-# Install ccache/sccache (do this last, so we get priority in PATH)
-ARG SKIP_SCCACHE_INSTALL
-COPY ./common/install_cache.sh install_cache.sh
-ENV PATH /opt/cache/bin:$PATH
-RUN if [ -z "${SKIP_SCCACHE_INSTALL}" ]; then bash ./install_cache.sh; fi
-RUN rm install_cache.sh
-
-# Add jni.h for java host build
-COPY ./common/install_jni.sh install_jni.sh
-COPY ./java/jni.h jni.h
-RUN bash ./install_jni.sh && rm install_jni.sh
-
-# Install Open MPI for CUDA
-COPY ./common/install_openmpi.sh install_openmpi.sh
-RUN if [ -n "${CUDA_VERSION}" ]; then bash install_openmpi.sh; fi
-RUN rm install_openmpi.sh
-
-# Include BUILD_ENVIRONMENT environment variable in image
-ARG BUILD_ENVIRONMENT
-ENV BUILD_ENVIRONMENT ${BUILD_ENVIRONMENT}
-
-# Install LLVM dev version (Defined in the pytorch/builder github repository)
-ARG SKIP_LLVM_SRC_BUILD_INSTALL
-COPY --from=pytorch/llvm:9.0.1 /opt/llvm /opt/llvm
-RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm; fi
-
-# AWS specific CUDA build guidance
-ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
-ENV CUDA_PATH /usr/local/cuda
-
-USER jenkins
-CMD ["bash"]
--- a/.ci/libtorch/build.sh
+++ b/.ci/libtorch/build.sh
@ -1,10 +0,0 @@
-#!/usr/bin/env bash
-
-# This is mostly just a shim to manywheel/build.sh
-# TODO: Make this a dedicated script to build just libtorch
-
-set -ex
-
-SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
-
-USE_NVSHMEM=0 USE_CUSPARSELT=0 BUILD_PYTHONLESS=1 DESIRED_PYTHON="3.10" ${SCRIPTPATH}/../manywheel/build.sh
--- a/.ci/lumen_cli/README.md
+++ b/.ci/lumen_cli/README.md
@ -1,31 +0,0 @@
-# 🔧 Lumen_cli
-A Python CLI tool for building and testing PyTorch-based components, using a YAML configuration file for structured, repeatable workflows.
-
-
-## Features
- **Build**
-    - external projects (e.g. vLLM)
-
-## 📦 Installation
-at the root of the pytorch repo
-```bash
-pip install -e .ci/lumen_cli
-```
-
-## Run the cli tool
-The cli tool must be used at root of pytorch repo, as example to run build external vllm:
-```bash
-python -m cli.run build external vllm
-```
-this will run the build steps with default behaviour for vllm project.
-
-to see help messages, run
-```bash
-python3 -m cli.run --help
-```
-
-## Add customized external build logics
-To add a new external build, for instance, add a new external build logics:
-1. create the build function in cli/lib folder
-2. register your target and the main build function at  EXTERNAL_BUILD_TARGET_DISPATCH in `cli/build_cli/register_build.py`
-3. [optional] create your ci config file in .github/ci_configs/${EXTERNAL_PACKAGE_NAME}.yaml
--- a/.ci/lumen_cli/cli/build_cli/register_build.py
+++ b/.ci/lumen_cli/cli/build_cli/register_build.py
@ -1,37 +0,0 @@
-import argparse
-import logging
-
-from cli.lib.common.cli_helper import register_targets, RichHelp, TargetSpec
-from cli.lib.core.vllm.vllm_build import VllmBuildRunner
-
-
-logger = logging.getLogger(__name__)
-
-# Maps targets to their argparse configuration and runner
-# it adds new target to path python -m cli.run build external {target} with buildrunner
-_TARGETS: dict[str, TargetSpec] = {
-    "vllm": {
-        "runner": VllmBuildRunner,
-        "help": "Build vLLM using docker buildx.",
-    }
-    # add yours ...
-}
-
-
-def register_build_commands(subparsers: argparse._SubParsersAction) -> None:
-    build_parser = subparsers.add_parser(
-        "build",
-        help="Build related commands",
-        formatter_class=RichHelp,
-    )
-    build_subparsers = build_parser.add_subparsers(dest="build_command", required=True)
-    overview = "\n".join(
-        f"  {name:12} {spec.get('help', '')}" for name, spec in _TARGETS.items()
-    )
-    external_parser = build_subparsers.add_parser(
-        "external",
-        help="Build external targets",
-        description="Build third-party targets.\n\nAvailable targets:\n" + overview,
-        formatter_class=RichHelp,
-    )
-    register_targets(external_parser, _TARGETS)
--- a/.ci/lumen_cli/cli/lib/common/cli_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/cli_helper.py
@ -1,71 +0,0 @@
-"""
-Cli Argparser Utility helpers for CLI tasks.
-
-"""
-
-import argparse
-from abc import ABC, abstractmethod
-
-
-try:
-    from typing import Any, Callable, Required, TypedDict  # Python 3.11+
-except ImportError:
-    from typing import Any, Callable, TypedDict
-
-    from typing_extensions import Required  # Fallback for Python <3.11
-
-
-class BaseRunner(ABC):
-    def __init__(self, args: Any) -> None:
-        self.args = args
-
-    @abstractmethod
-    def run(self) -> None:
-        """runs main logics, required"""
-
-
-# Pretty help: keep newlines + show defaults
-class RichHelp(
-    argparse.ArgumentDefaultsHelpFormatter, argparse.RawDescriptionHelpFormatter
-):
-    pass
-
-
-class TargetSpec(TypedDict, total=False):
-    """CLI subcommand specification with bA."""
-
-    runner: Required[type[BaseRunner]]
-    help: str
-    description: str
-    add_arguments: Callable[[argparse.ArgumentParser], None]
-
-
-def register_targets(
-    parser: argparse.ArgumentParser,
-    target_specs: dict[str, TargetSpec],
-    common_args: Callable[[argparse.ArgumentParser], None] = lambda _: None,
-) -> None:
-    """Register target subcommands."""
-    targets = parser.add_subparsers(
-        dest="target",
-        required=True,
-        metavar="{" + ",".join(target_specs.keys()) + "}",
-    )
-
-    for name, spec in target_specs.items():
-        desc = spec.get("description") or spec["runner"].__doc__ or ""
-
-        p = targets.add_parser(
-            name,
-            help=spec.get("help", ""),
-            description=desc.strip(),
-            formatter_class=RichHelp,
-        )
-        p.set_defaults(
-            func=lambda args, cls=spec["runner"]: cls(args).run(),
-            _runner_class=spec["runner"],
-        )
-        if "add_arguments" in spec and callable(spec["add_arguments"]):
-            spec["add_arguments"](p)
-        if common_args:
-            common_args(p)
--- a/.ci/lumen_cli/cli/lib/common/docker_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/docker_helper.py
@ -1,42 +0,0 @@
-"""
-Docker Utility helpers for CLI tasks.
-"""
-
-import logging
-from typing import Optional
-
-import docker
-from docker.errors import APIError, NotFound
-
-
-logger = logging.getLogger(__name__)
-
-# lazy singleton so we don't reconnect every call
-_docker_client: Optional[docker.DockerClient] = None
-
-
-def _get_client() -> docker.DockerClient:
-    global _docker_client
-    if _docker_client is None:
-        _docker_client = docker.from_env()
-    return _docker_client
-
-
-def local_image_exists(
-    image_name: str, client: Optional[docker.DockerClient] = None
-) -> bool:
-    """Return True if a local Docker image exists."""
-    if not image_name:
-        return False
-
-    client = client or _get_client()
-    try:
-        client.images.get(image_name)
-        return True
-    except (NotFound, APIError) as e:
-        logger.error(
-            "Error when checking Docker image '%s': %s",
-            image_name,
-            e.explanation if hasattr(e, "explanation") else str(e),
-        )
-        return False
--- a/.ci/lumen_cli/cli/lib/common/envs_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/envs_helper.py
@ -1,110 +0,0 @@
-"""
-Environment Variables and Dataclasses Utility helpers for CLI tasks.
-"""
-
-import os
-from dataclasses import field, fields, is_dataclass, MISSING
-from pathlib import Path
-from textwrap import indent
-from typing import Optional, Union
-
-from cli.lib.common.utils import str2bool
-
-
-def get_env(name: str, default: str = "") -> str:
-    """Get environment variable with default fallback."""
-    return os.environ.get(name) or default
-
-
-def env_path_optional(
-    name: str,
-    default: Optional[Union[str, Path]] = None,
-    resolve: bool = True,
-) -> Optional[Path]:
-    """Get environment variable as optional Path."""
-    val = get_env(name) or default
-    if not val:
-        return None
-
-    path = Path(val)
-    return path.resolve() if resolve else path
-
-
-def env_path(
-    name: str,
-    default: Optional[Union[str, Path]] = None,
-    resolve: bool = True,
-) -> Path:
-    """Get environment variable as Path, raise if missing."""
-    path = env_path_optional(name, default, resolve)
-    if not path:
-        raise ValueError(f"Missing path value for {name}")
-    return path
-
-
-def env_bool(
-    name: str,
-    default: bool = False,
-) -> bool:
-    val = get_env(name)
-    if not val:
-        return default
-    return str2bool(val)
-
-
-def env_bool_field(
-    name: str,
-    default: bool = False,
-):
-    return field(default_factory=lambda: env_bool(name, default))
-
-
-def env_path_field(
-    name: str,
-    default: Union[str, Path] = "",
-    *,
-    resolve: bool = True,
-) -> Path:
-    return field(default_factory=lambda: env_path(name, default, resolve=resolve))
-
-
-def env_str_field(
-    name: str,
-    default: str = "",
-) -> str:
-    return field(default_factory=lambda: get_env(name, default))
-
-
-def generate_dataclass_help(cls) -> str:
-    """Auto-generate help text for dataclass fields."""
-    if not is_dataclass(cls):
-        raise TypeError(f"{cls} is not a dataclass")
-
-    def get_value(f):
-        if f.default is not MISSING:
-            return f.default
-        if f.default_factory is not MISSING:
-            try:
-                return f.default_factory()
-            except Exception as e:
-                return f"<error: {e}>"
-        return "<required>"
-
-    lines = [f"{f.name:<22} = {repr(get_value(f))}" for f in fields(cls)]
-    return indent("\n".join(lines), "    ")
-
-
-def with_params_help(params_cls: type, title: str = "Parameter defaults"):
-    """
-    Class decorator that appends a help table generated from another dataclass
-    (e.g., VllmParameters) to the decorated class's docstring.
-    """
-    if not is_dataclass(params_cls):
-        raise TypeError(f"{params_cls} must be a dataclass")
-
-    def _decorator(cls: type) -> type:
-        block = generate_dataclass_help(params_cls)
-        cls.__doc__ = (cls.__doc__ or "") + f"\n\n{title}:\n{block}"
-        return cls
-
-    return _decorator
--- a/.ci/lumen_cli/cli/lib/common/gh_summary.py
+++ b/.ci/lumen_cli/cli/lib/common/gh_summary.py
@ -1,143 +0,0 @@
-from __future__ import annotations
-
-import logging
-import os
-import textwrap
-from pathlib import Path
-from typing import TYPE_CHECKING
-
-from cli.lib.common.utils import get_wheels
-from jinja2 import Template
-
-
-if TYPE_CHECKING:
-    from collections.abc import Iterable, Mapping
-
-
-logger = logging.getLogger(__name__)
-
-_TPL_CONTENT = Template(
-    textwrap.dedent("""\
-    ## {{ title }}
-
-    ```{{ lang }}
-    {{ content }}
-    ```
-""")
-)
-
-_TPL_LIST_ITEMS = Template(
-    textwrap.dedent("""\
-    ## {{ title }}
-    {% for it in items %}
-    - {{ it.pkg }}: {{ it.relpath }}
-    {% else %}
-    _(no item found)_
-    {% endfor %}
-    """)
-)
-
-_TPL_TABLE = Template(
-    textwrap.dedent("""\
-    {%- if rows %}
-    | {{ cols | join(' | ') }} |
-    |{%- for _ in cols %} --- |{%- endfor %}
-    {%- for r in rows %}
-    | {%- for c in cols %} {{ r.get(c, "") }} |{%- endfor %}
-    {%- endfor %}
-    {%- else %}
-    _(no data)_
-    {%- endif %}
-""")
-)
-
-
-def gh_summary_path() -> Path | None:
-    """Return the Path to the GitHub step summary file, or None if not set."""
-    p = os.environ.get("GITHUB_STEP_SUMMARY")
-    return Path(p) if p else None
-
-
-def write_gh_step_summary(md: str, *, append_content: bool = True) -> bool:
-    """
-    Write Markdown content to the GitHub Step Summary file if GITHUB_STEP_SUMMARY is set.
-    append_content: default true, if True, append to the end of the file, else overwrite the whole file
-
-    Returns:
-        True if written successfully (in GitHub Actions environment),
-        False if skipped (e.g., running locally where the variable is not set).
-    """
-    sp = gh_summary_path()
-    if not sp:
-        logger.info("[gh-summary] GITHUB_STEP_SUMMARY not set, skipping write.")
-        return False
-
-    md_clean = textwrap.dedent(md).strip() + "\n"
-
-    mode = "a" if append_content else "w"
-    with sp.open(mode, encoding="utf-8") as f:
-        f.write(md_clean)
-    return True
-
-
-def md_heading(text: str, level: int = 2) -> str:
-    """Generate a Markdown heading string with the given level (1-6)."""
-    return f"{'#' * max(1, min(level, 6))} {text}\n"
-
-
-def md_details(summary: str, content: str) -> str:
-    """Generate a collapsible <details> block with a summary and inner content."""
-    return f"<details>\n<summary>{summary}</summary>\n\n{content}\n\n</details>\n"
-
-
-def summarize_content_from_file(
-    output_dir: Path,
-    freeze_file: str,
-    title: str = "Content from file",
-    code_lang: str = "",  # e.g. "text" or "ini"
-) -> bool:
-    f = Path(output_dir) / freeze_file
-    if not f.exists():
-        return False
-    content = f.read_text(encoding="utf-8").strip()
-    md = render_content(content, title=title, lang=code_lang)
-    return write_gh_step_summary(md)
-
-
-def summarize_wheels(path: Path, title: str = "Wheels", max_depth: int = 3):
-    items = get_wheels(path, max_depth=max_depth)
-    if not items:
-        return False
-    md = render_list(items, title=title)
-    return write_gh_step_summary(md)
-
-
-def md_kv_table(rows: Iterable[Mapping[str, str | int | float]]) -> str:
-    """
-    Render a list of dicts as a Markdown table using Jinja template.
-    """
-    rows = list(rows)
-    cols = list({k for r in rows for k in r.keys()})
-    md = _TPL_TABLE.render(cols=cols, rows=rows).strip() + "\n"
-    return md
-
-
-def render_list(
-    items: Iterable[str],
-    *,
-    title: str = "List",
-) -> str:
-    tpl = _TPL_LIST_ITEMS
-    md = tpl.render(title=title, items=items)
-    return md
-
-
-def render_content(
-    content: str,
-    *,
-    title: str = "Content",
-    lang: str = "text",
-) -> str:
-    tpl = _TPL_CONTENT
-    md = tpl.render(title=title, content=content, lang=lang)
-    return md
--- a/.ci/lumen_cli/cli/lib/common/git_helper.py
+++ b/.ci/lumen_cli/cli/lib/common/git_helper.py
@ -1,69 +0,0 @@
-"""
-Git Utility helpers for CLI tasks.
-"""
-
-import logging
-from pathlib import Path
-
-from cli.lib.common.path_helper import remove_dir
-from git import GitCommandError, RemoteProgress, Repo
-
-
-logger = logging.getLogger(__name__)
-
-
-class PrintProgress(RemoteProgress):
-    """Simple progress logger for git operations."""
-
-    def __init__(self, interval: int = 5):
-        super().__init__()
-        self._last_percent = -1
-        self._interval = interval
-
-    def update(self, op_code, cur, max=None, message=""):
-        msg = self._cur_line or message
-        if max and cur:
-            percent = int(cur / max * 100)
-            if percent != self._last_percent and percent % self._interval == 0:
-                self._last_percent = percent
-                logger.info("Progress: %d%% - %s", percent, msg)
-        elif msg:
-            logger.info(msg)
-
-
-def clone_external_repo(target: str, repo: str, dst: str = "", update_submodules=False):
-    """Clone repository with pinned commit and optional submodules."""
-    dst = dst or target
-
-    try:
-        logger.info("Cloning %s to %s", target, dst)
-
-        # Clone and fetch
-        remove_dir(dst)
-        r = Repo.clone_from(repo, dst, progress=PrintProgress())
-        r.git.fetch("--all", "--tags")
-
-        # Checkout pinned commit
-        commit = get_post_build_pinned_commit(target)
-        logger.info("Checking out pinned %s commit %s", target, commit)
-        r.git.checkout(commit)
-
-        # Update submodules if requested
-        if update_submodules and r.submodules:
-            logger.info("Updating %d submodule(s)", len(r.submodules))
-            for sm in r.submodules:
-                sm.update(init=True, recursive=True, progress=PrintProgress())
-
-        logger.info("Successfully cloned %s", target)
-        return r, commit
-
-    except GitCommandError as e:
-        logger.error("Git operation failed: %s", e)
-        raise
-
-
-def get_post_build_pinned_commit(name: str, prefix=".github/ci_commit_pins") -> str:
-    path = Path(prefix) / f"{name}.txt"
-    if not path.exists():
-        raise FileNotFoundError(f"Pin file not found: {path}")
-    return path.read_text(encoding="utf-8").strip()
--- a/Show More
+++ b/Show More
 @ -1 +1 @@
 .5.0
 .2.1