[Build] Add linker script optimization (#121975)

This PR adds a linker script optimization based on prioritized symbols that can be extracted from the profiles of popular workloads. The present linker script was generated to target ARM+CUDA and later can be extended if necessary. The reason we target ARM is shown below: > PyTorch and other applications that access more than 24x 2MB code regions in quick succession can result in performance bottlenecks in the CPU front-end. The link-time optimization improves executable code locality and improve performance. We recommend turning on the optimization always for PyTorch and other application that behaves similarly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/121975 Approved by: https://github.com/ptrblck, https://github.com/atalman
2025-10-20 21:14:14 +08:00 · 2024-04-09 20:22:20 +00:00
parent 178ce1433c
commit a6080f79e9
3 changed files with 1270 additions and 0 deletions
--- a/cmake/prioritized_text.txt
+++ b/cmake/prioritized_text.txt
--- a/setup.py
+++ b/setup.py
@ -223,6 +223,9 @@
 #   USE_MIMALLOC
 #      Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
 #      By default, It is only enabled on Windows.
+#
+#   USE_PRIORITIZED_TEXT_FOR_LD
+#      Uses prioritized text form cmake/prioritized_text.txt for LD

 import sys

@ -263,6 +266,7 @@ from tools.build_pytorch_libs import build_caffe2
 from tools.generate_torch_version import get_torch_version
 from tools.setup_helpers.cmake import CMake
 from tools.setup_helpers.env import build_type, IS_DARWIN, IS_LINUX, IS_WINDOWS
+from tools.setup_helpers.generate_linker_script import gen_linker_script

 ################################################################################
 # Parameters parsed from environment
@ -1114,6 +1118,31 @@ def main():
        'mkl>=2021.1.1,<=2021.4.0; platform_system == "Windows"',
    ]

+    use_prioritized_text = str(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD", ""))
+    if (
+        use_prioritized_text == ""
+        and platform.system() == "Linux"
+        and platform.processor() == "aarch64"
+    ):
+        print_box(
+            """
+            WARNING: we strongly recommend enabling linker script optimization for ARM + CUDA.
+            To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
+            """
+        )
+    if use_prioritized_text == "1" or use_prioritized_text == "True":
+        gen_linker_script(
+            filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld"
+        )
+        linker_script_path = os.path.abspath("cmake/linker_script.ld")
+        os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}"
+        os.environ["CFLAGS"] = (
+            os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
+        )
+        os.environ["CXXFLAGS"] = (
+            os.getenv("CXXFLAGS", "") + " -ffunction-sections -fdata-sections"
+        )
+
    # Parse the command line and check the arguments before we proceed with
    # building deps and setup. We need to set values so `--help` works.
    dist = Distribution()
--- a/tools/setup_helpers/generate_linker_script.py
+++ b/tools/setup_helpers/generate_linker_script.py
@ -0,0 +1,37 @@
+import subprocess
+
+
+def gen_linker_script(
+    filein: str = "cmake/prioritized_text.txt", fout: str = "cmake/linker_script.ld"
+) -> None:
+    with open(filein) as f:
+        prioritized_text = f.readlines()
+        prioritized_text = [
+            line.replace("\n", "") for line in prioritized_text if line != "\n"
+        ]
+
+    linker_script_lines = subprocess.check_output(["ld", "-verbose"], text=True).split(
+        "\n"
+    )
+
+    indices = [
+        i
+        for i, x in enumerate(linker_script_lines)
+        if x == "=================================================="
+    ]
+    linker_script_lines = linker_script_lines[indices[0] + 1 : indices[1]]
+
+    text_line_start = [
+        i for i, line in enumerate(linker_script_lines) if ".text           :" in line
+    ]
+    assert len(text_line_start) == 1, "The linker script has multiple text sections!"
+    text_line_start = text_line_start[0]
+
+    with open(fout, "w") as f:
+        for lineid, line in enumerate(linker_script_lines):
+            if lineid == text_line_start + 2:
+                f.write("    *(\n")
+                for plines in prioritized_text:
+                    f.write(f"      .text.{plines}\n")
+                f.write("    )\n")
+            f.write(f"{line}\n")