Revert "[BE]: Reduce binary size 40% using aggressive fatbin compression. (#157791)"

This reverts commit 9bdf87e8918b9a3f78d7bcb8a770c19f7c82ac15.

Reverted https://github.com/pytorch/pytorch/pull/157791 on behalf of https://github.com/albanD due to Reverting to avoid regressing on the driver supported ([comment](https://github.com/pytorch/pytorch/pull/157791#issuecomment-3058091176))
This commit is contained in:
PyTorch MergeBot
2025-07-10 16:14:06 +00:00
parent 4781d72faa
commit 493bd625e2
2 changed files with 8 additions and 6 deletions

View File

@ -181,7 +181,7 @@ RUN if [ -n "${SKIP_LLVM_SRC_BUILD_INSTALL}" ]; then set -eu; rm -rf /opt/llvm;
# AWS specific CUDA build guidance
ENV TORCH_CUDA_ARCH_LIST Maxwell
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all -compress-mode=size"
ENV TORCH_NVCC_FLAGS "-Xfatbin -compress-all"
ENV CUDA_PATH /usr/local/cuda
USER jenkins

View File

@ -4,7 +4,7 @@ set -ex
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P ))"
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all -compress-mode=size"
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all"
export NCCL_ROOT_DIR=/usr/local/cuda
export TH_BINARY_BUILD=1
export USE_STATIC_CUDNN=1
@ -57,14 +57,16 @@ case ${CUDA_VERSION} in
#removing sm_50-sm_60 as these architectures are deprecated in CUDA 12.8/9 and will be removed in future releases
#however we would like to keep sm_70 architecture see: https://github.com/pytorch/pytorch/issues/157517
12.8)
TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0+PTX"
TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0"
;;
12.9)
TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;9.0;10.0;12.0+PTX"
# WAR to resolve the ld error in libtorch build with CUDA 12.9
if [[ "$PACKAGE_TYPE" == "libtorch" ]]; then
TORCH_CUDA_ARCH_LIST="7.5;8.0;9.0;10.0;12.0+PTX"
fi
;;
12.6)
# CUDA 12.6 seems to have a bug which prevents aggressive compression here
export TORCH_NVCC_FLAGS="${TORCH_NVCC_FLAGS} --compress-mode=default"
TORCH_CUDA_ARCH_LIST="5.0;6.0;7.0;7.5;8.0;8.6;9.0"
;;
*)
@ -112,7 +114,7 @@ DEPS_SONAME=(
if [[ $CUDA_VERSION == 12* ]]; then
export USE_STATIC_CUDNN=0
# Try parallelizing nvcc as well
export TORCH_NVCC_FLAGS="${TORCH_NVCC_FLAGS} --threads 2"
export TORCH_NVCC_FLAGS="-Xfatbin -compress-all --threads 2"
if [[ -z "$PYTORCH_EXTRA_INSTALL_REQUIREMENTS" ]]; then
echo "Bundling with cudnn and cublas."
DEPS_LIST+=(