Files
vllm/tools/install_nixl_from_source_ubuntu.py

225 lines
7.9 KiB
Python

# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
# install_prerequisites.py
import argparse
import glob
import os
import subprocess
import sys
# --- Configuration ---
WHEELS_CACHE_HOME = os.environ.get("WHEELS_CACHE_HOME", "/tmp/wheels_cache")
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
UCX_DIR = os.path.join("/tmp", "ucx_source")
NIXL_DIR = os.path.join("/tmp", "nixl_source")
UCX_INSTALL_DIR = os.path.join("/tmp", "ucx_install")
UCX_REPO_URL = "https://github.com/openucx/ucx.git"
NIXL_REPO_URL = "https://github.com/ai-dynamo/nixl.git"
# --- Helper Functions ---
def run_command(command, cwd=".", env=None):
"""Helper function to run a shell command and check for errors."""
print(f"--> Running command: {' '.join(command)} in '{cwd}'", flush=True)
subprocess.check_call(command, cwd=cwd, env=env)
def is_pip_package_installed(package_name):
"""Checks if a package is installed via pip without raising an exception."""
result = subprocess.run(
[sys.executable, "-m", "pip", "show", package_name],
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
)
return result.returncode == 0
def find_nixl_wheel_in_cache(cache_dir):
"""Finds a nixl wheel file in the specified cache directory."""
# The repaired wheel will have a 'manylinux' tag, but this glob still works.
search_pattern = os.path.join(cache_dir, "nixl-*.whl")
wheels = glob.glob(search_pattern)
if wheels:
# Sort to get the most recent/highest version if multiple exist
wheels.sort()
return wheels[-1]
return None
def install_system_dependencies():
"""Installs required system packages using apt-get if run as root."""
if os.geteuid() != 0:
print("\n---", flush=True)
print(
"WARNING: Not running as root. \
Skipping system dependency installation.",
flush=True,
)
print(
"Please ensure the listed packages are installed on your system:",
flush=True,
)
print(
" patchelf build-essential git cmake ninja-build \
autotools-dev automake meson libtool libtool-bin",
flush=True,
)
print("---\n", flush=True)
return
print("--- Running as root. Installing system dependencies... ---", flush=True)
apt_packages = [
"patchelf", # <-- Add patchelf here
"build-essential",
"git",
"cmake",
"ninja-build",
"autotools-dev",
"automake",
"meson",
"libtool",
"libtool-bin",
]
run_command(["apt-get", "update"])
run_command(["apt-get", "install", "-y"] + apt_packages)
print("--- System dependencies installed successfully. ---\n", flush=True)
def build_and_install_prerequisites(args):
"""Builds UCX and NIXL from source, creating a self-contained wheel."""
if not args.force_reinstall and is_pip_package_installed("nixl"):
print("--> NIXL is already installed. Nothing to do.", flush=True)
return
cached_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME)
if not args.force_reinstall and cached_wheel:
print(
f"\n--> Found self-contained wheel: \
{os.path.basename(cached_wheel)}.",
flush=True,
)
print("--> Installing from cache, skipping all source builds.", flush=True)
install_command = [sys.executable, "-m", "pip", "install", cached_wheel]
run_command(install_command)
print("\n--- Installation from cache complete. ---", flush=True)
return
print(
"\n--> No installed package or cached wheel found. \
Starting full build process...",
flush=True,
)
print("\n--> Installing auditwheel...", flush=True)
run_command([sys.executable, "-m", "pip", "install", "auditwheel"])
install_system_dependencies()
ucx_install_path = os.path.abspath(UCX_INSTALL_DIR)
print(f"--> Using wheel cache directory: {WHEELS_CACHE_HOME}", flush=True)
os.makedirs(WHEELS_CACHE_HOME, exist_ok=True)
# -- Step 1: Build UCX from source --
print("\n[1/3] Configuring and building UCX from source...", flush=True)
if not os.path.exists(UCX_DIR):
run_command(["git", "clone", UCX_REPO_URL, UCX_DIR])
ucx_source_path = os.path.abspath(UCX_DIR)
run_command(["git", "checkout", "v1.19.x"], cwd=ucx_source_path)
run_command(["./autogen.sh"], cwd=ucx_source_path)
configure_command = [
"./configure",
f"--prefix={ucx_install_path}",
"--enable-shared",
"--disable-static",
"--disable-doxygen-doc",
"--enable-optimizations",
"--enable-cma",
"--enable-devel-headers",
"--with-verbs",
"--enable-mt",
]
run_command(configure_command, cwd=ucx_source_path)
run_command(["make", "-j", str(os.cpu_count() or 1)], cwd=ucx_source_path)
run_command(["make", "install"], cwd=ucx_source_path)
print("--- UCX build and install complete ---", flush=True)
# -- Step 2: Build NIXL wheel from source --
print("\n[2/3] Building NIXL wheel from source...", flush=True)
if not os.path.exists(NIXL_DIR):
run_command(["git", "clone", NIXL_REPO_URL, NIXL_DIR])
build_env = os.environ.copy()
build_env["PKG_CONFIG_PATH"] = os.path.join(ucx_install_path, "lib", "pkgconfig")
ucx_lib_path = os.path.join(ucx_install_path, "lib")
ucx_plugin_path = os.path.join(ucx_lib_path, "ucx")
existing_ld_path = os.environ.get("LD_LIBRARY_PATH", "")
build_env["LD_LIBRARY_PATH"] = (
f"{ucx_lib_path}:{ucx_plugin_path}:{existing_ld_path}".strip(":")
)
print(f"--> Using LD_LIBRARY_PATH: {build_env['LD_LIBRARY_PATH']}", flush=True)
temp_wheel_dir = os.path.join(ROOT_DIR, "temp_wheelhouse")
run_command(
[
sys.executable,
"-m",
"pip",
"wheel",
".",
"--no-deps",
f"--wheel-dir={temp_wheel_dir}",
],
cwd=os.path.abspath(NIXL_DIR),
env=build_env,
)
# -- Step 3: Repair the wheel by copying UCX libraries --
print("\n[3/3] Repairing NIXL wheel to include UCX libraries...", flush=True)
unrepaired_wheel = find_nixl_wheel_in_cache(temp_wheel_dir)
if not unrepaired_wheel:
raise RuntimeError("Failed to find the NIXL wheel after building it.")
# We tell auditwheel to ignore the plugin that mesonpy already handled.
auditwheel_command = [
"auditwheel",
"repair",
"--exclude",
"libplugin_UCX.so", # <-- Exclude because mesonpy already includes it
unrepaired_wheel,
f"--wheel-dir={WHEELS_CACHE_HOME}",
]
run_command(auditwheel_command, env=build_env)
# --- CLEANUP ---
# No more temporary files to remove, just the temp wheelhouse
run_command(["rm", "-rf", temp_wheel_dir])
# --- END CLEANUP ---
newly_built_wheel = find_nixl_wheel_in_cache(WHEELS_CACHE_HOME)
if not newly_built_wheel:
raise RuntimeError("Failed to find the repaired NIXL wheel.")
print(
f"--> Successfully built self-contained wheel: \
{os.path.basename(newly_built_wheel)}. Now installing...",
flush=True,
)
install_command = [sys.executable, "-m", "pip", "install", newly_built_wheel]
if args.force_reinstall:
install_command.insert(-1, "--force-reinstall")
run_command(install_command)
print("--- NIXL installation complete ---", flush=True)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Build and install UCX and NIXL dependencies."
)
parser.add_argument(
"--force-reinstall",
action="store_true",
help="Force rebuild and reinstall of UCX and NIXL \
even if they are already installed.",
)
args = parser.parse_args()
build_and_install_prerequisites(args)