Revert "[BE]: Reduce binary size 40% using aggressive fatbin compression. (#157791)"

This reverts commit 9bdf87e8918b9a3f78d7bcb8a770c19f7c82ac15. Reverted https://github.com/pytorch/pytorch/pull/157791 on behalf of https://github.com/albanD due to Reverting to avoid regressing on the driver supported ([comment](https://github.com/pytorch/pytorch/pull/157791#issuecomment-3058091176))
2025-10-20 21:14:14 +08:00 · 2025-07-10 16:14:06 +00:00
parent 4781d72faa
commit 493bd625e2
2 changed files with 8 additions and 6 deletions
--- a/.ci/docker/ubuntu/Dockerfile
+++ b/.ci/docker/ubuntu/Dockerfile
@ -181,7 +181,7 @@ RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm;

 # AWS specific CUDA build guidance
 ENV TORCH_CUDA_ARCH_LIST Maxwell
-ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all -compress-mode=size"
+ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
 ENV CUDA_PATH /usr/local/cuda

 USER jenkins
--- a/.ci/manywheel/build_cuda.sh
+++ b/.ci/manywheel/build_cuda.sh
@ -4,7 +4,7 @@ set -ex

 SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P ))"

-export TORCH_NVCC_FLAGS="-Xfatbin -compress-all -compress-mode=size"
+export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
 export NCCL_ROOT_DIR=/usr/local/cuda
 export TH_BINARY_BUILD=1
 export USE_STATIC_CUDNN=1
@ -57,14 +57,16 @@ case ${CUDA_VERSION} in
    #removing sm_50-sm_60 as these architectures are deprecated in CUDA 12.8/9 and will be removed in future releases
    #however we would like to keep sm_70 architecture see: https://github.com/pytorch/pytorch/issues/157517
    12.8)
-        TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0+PTX"
+        TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0"
        ;;
    12.9)
        TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0+PTX"
+        # WAR to resolve the ld error in libtorch build with CUDA 12.9
+        if [[ "$PACKAGE_TYPE" == "libtorch" ]]; then
+            TORCH_CUDA_ARCH_LIST="7.5;8.0;9.0;10.0;12.0+PTX"
+        fi
        ;;
    12.6)
-        # CUDA 12.6 seems to have a bug which prevents aggressive compression here
-        export TORCH_NVCC_FLAGS="${TORCH_NVCC_FLAGS} --compress-mode=default"
        TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0"
        ;;
    *)
@ -112,7 +114,7 @@ DEPS_SONAME=(
 if [[ $CUDA_VERSION == 12* ]]; then
    export USE_STATIC_CUDNN=0
    # Try parallelizing nvcc as well
-    export TORCH_NVCC_FLAGS="${TORCH_NVCC_FLAGS} --threads 2"
+    export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
    if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
        echo "Bundling with cudnn and cublas."
        DEPS_LIST+=(