#!/usr/bin/env python3 # encoding: UTF-8 import os import shutil from subprocess import check_call, check_output def list_dir(path: str) -> list[str]: """' Helper for getting paths for Python """ return check_output(["ls", "-1", path]).decode().split("\n") def build_ArmComputeLibrary() -> None: """ Using ArmComputeLibrary for aarch64 PyTorch """ print("Building Arm Compute Library") acl_build_flags = [ "debug=0", "neon=1", "opencl=0", "os=linux", "openmp=1", "cppthreads=0", "arch=armv8a", "multi_isa=1", "fixed_format_kernels=1", "build=native", ] acl_install_dir = "/acl" acl_checkout_dir = os.getenv("ACL_SOURCE_DIR", "ComputeLibrary") if os.path.isdir(acl_install_dir): shutil.rmtree(acl_install_dir) if not os.path.isdir(acl_checkout_dir) or not len(os.listdir(acl_checkout_dir)): check_call( [ "git", "clone", "https://github.com/ARM-software/ComputeLibrary.git", "-b", "v25.02", "--depth", "1", "--shallow-submodules", ] ) check_call( ["scons", "Werror=1", f"-j{os.cpu_count()}"] + acl_build_flags, cwd=acl_checkout_dir, ) for d in ["arm_compute", "include", "utils", "support", "src", "build"]: shutil.copytree(f"{acl_checkout_dir}/{d}", f"{acl_install_dir}/{d}") def replace_tag(filename) -> None: with open(filename) as f: lines = f.readlines() for i, line in enumerate(lines): if line.startswith("Tag:"): lines[i] = line.replace("-linux_", "-manylinux_2_28_") print(f"Updated tag from {line} to {lines[i]}") break with open(filename, "w") as f: f.writelines(lines) def patch_library_rpath( folder: str, lib_name: str, use_nvidia_pypi_libs: bool = False, desired_cuda: str = "", ) -> None: """Apply patchelf to set RPATH for a library in torch/lib""" lib_path = f"{folder}/tmp/torch/lib/{lib_name}" if use_nvidia_pypi_libs: # For PyPI NVIDIA libraries, construct CUDA RPATH cuda_rpaths = [ "$ORIGIN/../../nvidia/cudnn/lib", "$ORIGIN/../../nvidia/nvshmem/lib", "$ORIGIN/../../nvidia/nccl/lib", "$ORIGIN/../../nvidia/cusparselt/lib", ] if "130" in desired_cuda: cuda_rpaths.append("$ORIGIN/../../nvidia/cu13/lib") else: cuda_rpaths.extend( [ "$ORIGIN/../../nvidia/cublas/lib", "$ORIGIN/../../nvidia/cuda_cupti/lib", "$ORIGIN/../../nvidia/cuda_nvrtc/lib", "$ORIGIN/../../nvidia/cuda_runtime/lib", "$ORIGIN/../../nvidia/cufft/lib", "$ORIGIN/../../nvidia/curand/lib", "$ORIGIN/../../nvidia/cusolver/lib", "$ORIGIN/../../nvidia/cusparse/lib", "$ORIGIN/../../nvidia/nvtx/lib", "$ORIGIN/../../nvidia/cufile/lib", ] ) # Add $ORIGIN for local torch libs rpath = ":".join(cuda_rpaths) + ":$ORIGIN" else: # For bundled libraries, just use $ORIGIN rpath = "$ORIGIN" if os.path.exists(lib_path): os.system( f"cd {folder}/tmp/torch/lib/; " f"patchelf --set-rpath '{rpath}' --force-rpath {lib_name}" ) def copy_and_patch_library( src_path: str, folder: str, use_nvidia_pypi_libs: bool = False, desired_cuda: str = "", ) -> None: """Copy a library to torch/lib and patch its RPATH""" if os.path.exists(src_path): lib_name = os.path.basename(src_path) shutil.copy2(src_path, f"{folder}/tmp/torch/lib/{lib_name}") patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda) def package_cuda_wheel(wheel_path, desired_cuda) -> None: """ Package the cuda wheel libraries """ folder = os.path.dirname(wheel_path) os.mkdir(f"{folder}/tmp") os.system(f"unzip {wheel_path} -d {folder}/tmp") # Delete original wheel since it will be repackaged os.system(f"rm {wheel_path}") # Check if we should use PyPI NVIDIA libraries or bundle system libraries use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1" if use_nvidia_pypi_libs: print("Using nvidia libs from pypi - skipping CUDA library bundling") # For PyPI approach, we don't bundle CUDA libraries - they come from PyPI packages # We only need to bundle non-NVIDIA libraries minimal_libs_to_copy = [ "/lib64/libgomp.so.1", "/usr/lib64/libgfortran.so.5", "/acl/build/libarm_compute.so", "/acl/build/libarm_compute_graph.so", "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", "/usr/local/lib/libnvpl_lapack_core.so.0", "/usr/local/lib/libnvpl_blas_core.so.0", ] # Copy minimal libraries to unzipped_folder/torch/lib for lib_path in minimal_libs_to_copy: copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda) # Patch torch libraries used for searching libraries torch_libs_to_patch = [ "libtorch.so", "libtorch_cpu.so", "libtorch_cuda.so", "libtorch_cuda_linalg.so", "libtorch_global_deps.so", "libtorch_python.so", "libtorch_nvshmem.so", "libc10.so", "libc10_cuda.so", "libcaffe2_nvrtc.so", "libshm.so", ] for lib_name in torch_libs_to_patch: patch_library_rpath(folder, lib_name, use_nvidia_pypi_libs, desired_cuda) else: print("Bundling CUDA libraries with wheel") # Original logic for bundling system CUDA libraries # Common libraries for all CUDA versions common_libs = [ # Non-NVIDIA system libraries "/lib64/libgomp.so.1", "/usr/lib64/libgfortran.so.5", "/acl/build/libarm_compute.so", "/acl/build/libarm_compute_graph.so", # Common CUDA libraries (same for all versions) "/usr/local/lib/libnvpl_lapack_lp64_gomp.so.0", "/usr/local/lib/libnvpl_blas_lp64_gomp.so.0", "/usr/local/lib/libnvpl_lapack_core.so.0", "/usr/local/lib/libnvpl_blas_core.so.0", "/usr/local/cuda/extras/CUPTI/lib64/libnvperf_host.so", "/usr/local/cuda/lib64/libcudnn.so.9", "/usr/local/cuda/lib64/libcusparseLt.so.0", "/usr/local/cuda/lib64/libcurand.so.10", "/usr/local/cuda/lib64/libnccl.so.2", "/usr/local/cuda/lib64/libnvshmem_host.so.3", "/usr/local/cuda/lib64/libcudnn_adv.so.9", "/usr/local/cuda/lib64/libcudnn_cnn.so.9", "/usr/local/cuda/lib64/libcudnn_graph.so.9", "/usr/local/cuda/lib64/libcudnn_ops.so.9", "/usr/local/cuda/lib64/libcudnn_engines_runtime_compiled.so.9", "/usr/local/cuda/lib64/libcudnn_engines_precompiled.so.9", "/usr/local/cuda/lib64/libcudnn_heuristic.so.9", "/usr/local/cuda/lib64/libcufile.so.0", "/usr/local/cuda/lib64/libcufile_rdma.so.1", "/usr/local/cuda/lib64/libcusparse.so.12", ] # CUDA version-specific libraries if "13" in desired_cuda: minor_version = desired_cuda[-1] version_specific_libs = [ "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.13", "/usr/local/cuda/lib64/libcublas.so.13", "/usr/local/cuda/lib64/libcublasLt.so.13", "/usr/local/cuda/lib64/libcudart.so.13", "/usr/local/cuda/lib64/libcufft.so.12", "/usr/local/cuda/lib64/libcusolver.so.12", "/usr/local/cuda/lib64/libnvJitLink.so.13", "/usr/local/cuda/lib64/libnvrtc.so.13", f"/usr/local/cuda/lib64/libnvrtc-builtins.so.13.{minor_version}", ] elif "12" in desired_cuda: # Get the last character for libnvrtc-builtins version (e.g., "129" -> "9") minor_version = desired_cuda[-1] version_specific_libs = [ "/usr/local/cuda/extras/CUPTI/lib64/libcupti.so.12", "/usr/local/cuda/lib64/libcublas.so.12", "/usr/local/cuda/lib64/libcublasLt.so.12", "/usr/local/cuda/lib64/libcudart.so.12", "/usr/local/cuda/lib64/libcufft.so.11", "/usr/local/cuda/lib64/libcusolver.so.11", "/usr/local/cuda/lib64/libnvJitLink.so.12", "/usr/local/cuda/lib64/libnvrtc.so.12", f"/usr/local/cuda/lib64/libnvrtc-builtins.so.12.{minor_version}", ] else: raise ValueError(f"Unsupported CUDA version: {desired_cuda}.") # Combine all libraries libs_to_copy = common_libs + version_specific_libs # Copy libraries to unzipped_folder/torch/lib for lib_path in libs_to_copy: copy_and_patch_library(lib_path, folder, use_nvidia_pypi_libs, desired_cuda) # Make sure the wheel is tagged with manylinux_2_28 for f in os.scandir(f"{folder}/tmp/"): if f.is_dir() and f.name.endswith(".dist-info"): replace_tag(f"{f.path}/WHEEL") break os.system(f"wheel pack {folder}/tmp/ -d {folder}") os.system(f"rm -rf {folder}/tmp/") def complete_wheel(folder: str) -> str: """ Complete wheel build and put in artifact location """ wheel_name = list_dir(f"/{folder}/dist")[0] # Please note for cuda we don't run auditwheel since we use custom script to package # the cuda dependencies to the wheel file using update_wheel() method. # However we need to make sure filename reflects the correct Manylinux platform. if "pytorch" in folder and not enable_cuda: print("Repairing Wheel with AuditWheel") check_call(["auditwheel", "repair", f"dist/{wheel_name}"], cwd=folder) repaired_wheel_name = list_dir(f"/{folder}/wheelhouse")[0] print(f"Moving {repaired_wheel_name} wheel to /{folder}/dist") os.rename( f"/{folder}/wheelhouse/{repaired_wheel_name}", f"/{folder}/dist/{repaired_wheel_name}", ) else: repaired_wheel_name = list_dir(f"/{folder}/dist")[0] print(f"Copying {repaired_wheel_name} to artifacts") shutil.copy2( f"/{folder}/dist/{repaired_wheel_name}", f"/artifacts/{repaired_wheel_name}" ) return repaired_wheel_name def parse_arguments(): """ Parse inline arguments """ from argparse import ArgumentParser parser = ArgumentParser("AARCH64 wheels python CD") parser.add_argument("--debug", action="store_true") parser.add_argument("--build-only", action="store_true") parser.add_argument("--test-only", type=str) parser.add_argument("--enable-mkldnn", action="store_true") parser.add_argument("--enable-cuda", action="store_true") return parser.parse_args() if __name__ == "__main__": """ Entry Point """ args = parse_arguments() enable_mkldnn = args.enable_mkldnn enable_cuda = args.enable_cuda branch = check_output( ["git", "rev-parse", "--abbrev-ref", "HEAD"], cwd="/pytorch" ).decode() print("Building PyTorch wheel") build_vars = "" # MAX_JOB=5 is not required for CPU backend (see commit 465d98b) if enable_cuda: build_vars += "MAX_JOBS=5 " # Handle PyPI NVIDIA libraries vs bundled libraries use_nvidia_pypi_libs = os.getenv("USE_NVIDIA_PYPI_LIBS", "0") == "1" if use_nvidia_pypi_libs: print("Configuring build for PyPI NVIDIA libraries") # Configure for dynamic linking (matching x86 logic) build_vars += "ATEN_STATIC_CUDA=0 USE_CUDA_STATIC_LINK=0 USE_CUPTI_SO=1 " else: print("Configuring build for bundled NVIDIA libraries") # Keep existing static linking approach - already configured above override_package_version = os.getenv("OVERRIDE_PACKAGE_VERSION") desired_cuda = os.getenv("DESIRED_CUDA") if override_package_version is not None: version = override_package_version build_vars += ( f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version} PYTORCH_BUILD_NUMBER=1 " ) elif branch in ["nightly", "main"]: build_date = ( check_output(["git", "log", "--pretty=format:%cs", "-1"], cwd="/pytorch") .decode() .replace("-", "") ) version = ( check_output(["cat", "version.txt"], cwd="/pytorch").decode().strip()[:-2] ) if enable_cuda: build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date}+{desired_cuda} PYTORCH_BUILD_NUMBER=1 " else: build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={version}.dev{build_date} PYTORCH_BUILD_NUMBER=1 " elif branch.startswith(("v1.", "v2.")): build_vars += f"BUILD_TEST=0 PYTORCH_BUILD_VERSION={branch[1 : branch.find('-')]} PYTORCH_BUILD_NUMBER=1 " if enable_mkldnn: build_ArmComputeLibrary() print("build pytorch with mkldnn+acl backend") build_vars += ( "USE_MKLDNN=ON USE_MKLDNN_ACL=ON " "ACL_ROOT_DIR=/acl " "LD_LIBRARY_PATH=/pytorch/build/lib:/acl/build:$LD_LIBRARY_PATH " "ACL_INCLUDE_DIR=/acl/build " "ACL_LIBRARY=/acl/build " ) if enable_cuda: build_vars += "BLAS=NVPL " else: build_vars += "BLAS=OpenBLAS OpenBLAS_HOME=/OpenBLAS " else: print("build pytorch without mkldnn backend") os.system(f"cd /pytorch; {build_vars} python3 -m build --wheel --no-isolation") if enable_cuda: print("Updating Cuda Dependency") filename = os.listdir("/pytorch/dist/") wheel_path = f"/pytorch/dist/{filename[0]}" package_cuda_wheel(wheel_path, desired_cuda) pytorch_wheel_name = complete_wheel("/pytorch/") print(f"Build Complete. Created {pytorch_wheel_name}..")