Revert "Move prioritized text linker optimization code from setup.py to cmake (#160078)"

This reverts commit 26b3ae58908becbb03b28636f7384d2972a8c9a5. Reverted https://github.com/pytorch/pytorch/pull/160078 on behalf of https://github.com/atalman due to Sorry reverting this broke linux aarch64 CUDA nightlies [pytorch/pytorch/actions/runs/17637486681/job/50146967503](https://github.com/pytorch/pytorch/actions/runs/17637486681/job/50146967503) ([comment](https://github.com/pytorch/pytorch/pull/160078#issuecomment-3281426631))
2025-10-20 21:14:14 +08:00 · 2025-09-11 15:29:29 +00:00
parent 9f783e172d
commit 94db2ad51d
8 changed files with 29 additions and 116 deletions
--- a/.ci/aarch64_linux/aarch64_ci_build.sh
+++ b/.ci/aarch64_linux/aarch64_ci_build.sh
@ -31,7 +31,8 @@ pip install -r /pytorch/requirements.txt
 pip install auditwheel==6.2.0 wheel
 if [ "$DESIRED_CUDA" = "cpu" ]; then
    echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
-    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
+    #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
+    USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
 else
    echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
    export USE_SYSTEM_NCCL=1
@ -45,5 +46,6 @@ else
        export USE_NVIDIA_PYPI_LIBS=1
    fi

-    python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
+    #USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
+    USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
 fi
--- a/.ci/aarch64_linux/aarch64_wheel_ci_build.py
+++ b/.ci/aarch64_linux/aarch64_wheel_ci_build.py
@ -317,7 +317,7 @@ if __name__ == "__main__":
    ).decode()

    print("Building PyTorch wheel")
-    build_vars = ""
+    build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
    # MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
    if enable_cuda:
        build_vars += "MAX_JOBS=5 "
--- a/.gitignore
+++ b/.gitignore
@ -259,9 +259,6 @@ gen
 .pytest_cache
 aten/build/*

-# Linker scripts for prioritized text optimization
-cmake/linker_script.ld
-
 # Bram
 plsdontbreak

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -379,13 +379,6 @@ cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
                       OFF "USE_CUDA" OFF)
 cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
                        "CPU_AARCH64" OFF)
-# prioritized text linker, ON by default for AArch64+Linux, option visible to all AArch64, x86 and ppc64le.
-set(USE_PRIORITIZED_TEXT_DEFAULT OFF)
-if(LINUX AND CPU_AARCH64)
-  set(USE_PRIORITIZED_TEXT_DEFAULT ON)
-endif()
-cmake_dependent_option(USE_PRIORITIZED_TEXT_FOR_LD "Use prioritized text linker for ld."
-  "${USE_PRIORITIZED_TEXT_DEFAULT}" "CPU_INTEL OR CPU_AARCH64 OR CPU_POWER" OFF)

 option(USE_MIMALLOC "Use mimalloc" OFF)
 # Enable third party mimalloc library to improve memory allocation performance
@ -664,11 +657,6 @@ endif(MSVC)

 string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")

-# Set linker max-page-size to 64KiB on AArch64 Linux
-if(LINUX AND CPU_AARCH64)
-  add_link_options_if_supported("-z,max-page-size=0x10000")
-endif()
-
 # Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
 # applicable to mobile are disabled by this variable. Setting
 # `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it
@ -1433,57 +1421,3 @@ if(BUILD_BUNDLE_PTXAS AND USE_CUDA)
  install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas"
          DESTINATION "${CMAKE_INSTALL_BINDIR}")
 endif()
-
-if(USE_PRIORITIZED_TEXT_FOR_LD)
-  add_compile_options(
-    $<$<COMPILE_LANGUAGE:C,CXX>:-ffunction-sections>
-    $<$<COMPILE_LANGUAGE:C,CXX>:-fdata-sections>
-  )
-  set(LINKER_SCRIPT_FILE_OUT "${CMAKE_SOURCE_DIR}/cmake/linker_script.ld")
-  set(LINKER_SCRIPT_FILE_IN "${CMAKE_SOURCE_DIR}/cmake/prioritized_text.txt")
-
-  add_custom_command(
-    OUTPUT "${LINKER_SCRIPT_FILE_OUT}"
-    COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py --filein "${LINKER_SCRIPT_FILE_IN}" --fout "${LINKER_SCRIPT_FILE_OUT}"
-    DEPENDS ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py "${LINKER_SCRIPT_FILE_IN}"
-    COMMENT "Generating prioritized text linker files"
-    VERBATIM
-  )
-
-  add_custom_target(generate_linker_script DEPENDS "${LINKER_SCRIPT_FILE_OUT}")
-
-  if(BUILD_PYTHON)
-    set(LINKER_OPT_TARGETS torch_python)
-  endif()
-
-  if(NOT BUILD_LIBTORCHLESS)
-    list(APPEND LINKER_OPT_TARGETS torch_cpu c10)
-    if(USE_CUDA)
-      list(APPEND LINKER_OPT_TARGETS torch_cuda c10_cuda)
-    endif()
-    if(USE_XPU)
-      list(APPEND LINKER_OPT_TARGETS torch_xpu c10_xpu)
-    endif()
-    if(USE_ROCM)
-      list(APPEND LINKER_OPT_TARGETS torch_hip c10_hip)
-    endif()
-  endif()
-
-  foreach(tgt IN LISTS LINKER_OPT_TARGETS)
-    if(TARGET ${tgt})
-      add_dependencies("${tgt}" generate_linker_script)
-      target_link_options_if_supported(${tgt} "-T,${LINKER_SCRIPT_FILE_OUT}")
-      set_property(TARGET ${tgt} APPEND PROPERTY LINK_DEPENDS "${LINKER_SCRIPT_FILE_OUT}")
-    else()
-       message(WARNING "Requested target '${tgt}' for linker script optimization was not found.")
-    endif()
-  endforeach()
-
-else()
-  if(LINUX AND CPU_AARCH64)
-    message(WARNING [[
-    It is strongly recommend to enable linker script optimization for all AArch64 Linux builds.
-    To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
-    ]])
-  endif()
-endif()
--- a/cmake/Summary.cmake
+++ b/cmake/Summary.cmake
@ -158,7 +158,6 @@ function(caffe2_print_configuration_summary)
  if(${USE_KLEIDIAI})
    message(STATUS "  USE_KLEIDIAI          : ${USE_KLEIDIAI}")
  endif()
-  message(STATUS "  USE_PRIORITIZED_TEXT_FOR_LD : ${USE_PRIORITIZED_TEXT_FOR_LD}")
  message(STATUS "  USE_UCC               : ${USE_UCC}")
  if(${USE_UCC})
    message(STATUS "    USE_SYSTEM_UCC        : ${USE_SYSTEM_UCC}")
--- a/cmake/public/utils.cmake
+++ b/cmake/public/utils.cmake
@ -482,7 +482,6 @@ function(torch_update_find_cuda_flags)
 endfunction()

 include(CheckCXXCompilerFlag)
-include(CheckLinkerFlag)

 ##############################################################################
 # CHeck if given flag is supported and append it to provided outputvar
@ -512,22 +511,3 @@ function(target_compile_options_if_supported target flag)
    target_compile_options(${target} PRIVATE ${flag})
  endif()
 endfunction()
-
-# Check if a global link option is supported
-function(add_link_options_if_supported flag)
-  check_linker_flag(C "LINKER:${flag}" _supported)
-  if("${_supported}")
-    add_link_options("LINKER:${flag}")
-  else()
-    message(WARNING "Attempted to use unsupported link option : ${flag}.")
-  endif()
-endfunction()
-
-function(target_link_options_if_supported tgt flag)
-  check_linker_flag(C "LINKER:${flag}" _supported)
-  if("${_supported}")
-    target_link_options("${tgt}" PRIVATE "LINKER:${flag}")
-  else()
-    message(WARNING "Attempted to use unsupported link option : ${flag}.")
-  endif()
-endfunction()
--- a/setup.py
+++ b/setup.py
@ -227,6 +227,9 @@
 #      Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
 #      By default, It is only enabled on Windows.
 #
+#   USE_PRIORITIZED_TEXT_FOR_LD
+#      Uses prioritized text form cmake/prioritized_text.txt for LD
+#
 #   BUILD_LIBTORCH_WHL
 #      Builds libtorch.so and its dependencies as a wheel
 #
@ -320,6 +323,7 @@ from tools.setup_helpers.env import (
    IS_LINUX,
    IS_WINDOWS,
 )
+from tools.setup_helpers.generate_linker_script import gen_linker_script


 def str2bool(value: str | None) -> bool:
@ -1623,6 +1627,26 @@ def main() -> None:
    if BUILD_PYTHON_ONLY:
        install_requires += [f"{LIBTORCH_PKG_NAME}=={TORCH_VERSION}"]

+    if str2bool(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD")):
+        gen_linker_script(
+            filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld"
+        )
+        linker_script_path = os.path.abspath("cmake/linker_script.ld")
+        os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}"
+        os.environ["CFLAGS"] = (
+            os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
+        )
+        os.environ["CXXFLAGS"] = (
+            os.getenv("CXXFLAGS", "") + " -ffunction-sections -fdata-sections"
+        )
+    elif platform.system() == "Linux" and platform.processor() == "aarch64":
+        print_box(
+            """
+            WARNING: we strongly recommend enabling linker script optimization for ARM + CUDA.
+            To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
+            """
+        )
+
    # Parse the command line and check the arguments before we proceed with
    # building deps and setup. We need to set values so `--help` works.
    dist = Distribution()
--- a/tools/setup_helpers/generate_linker_script.py
+++ b/tools/setup_helpers/generate_linker_script.py
@ -1,7 +1,5 @@
-import argparse
 import os
 import subprocess
-from pathlib import Path


 def gen_linker_script(
@ -30,10 +28,6 @@ def gen_linker_script(
    assert len(text_line_start) == 1, "The linker script has multiple text sections!"
    text_line_start = text_line_start[0]

-    # ensure that parent directory exists before writing
-    fout = Path(fout)
-    fout.parent.mkdir(parents=True, exist_ok=True)
-
    with open(fout, "w") as f:
        for lineid, line in enumerate(linker_script_lines):
            if lineid == text_line_start + 2:
@ -42,20 +36,3 @@ def gen_linker_script(
                    f.write(f"      .text.{plines}\n")
                f.write("    )\n")
            f.write(f"{line}\n")
-
-
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(
-        description="Generate linker file based on prioritized symbols. Used for link-time optimization.",
-    )
-    parser.add_argument(
-        "--filein",
-        help="Path to prioritized_text.txt input file",
-        default=argparse.SUPPRESS,
-    )
-    parser.add_argument(
-        "--fout", help="Output path for linker ld file", default=argparse.SUPPRESS
-    )
-    # convert args to a dict to pass to gen_linker_script
-    kwargs = vars(parser.parse_args())
-    gen_linker_script(**kwargs)