Place .lrodata later in the binary (#117575)

Summary: By default, in LLD 16, .lrodata is placed immediately after .rodata. However, .lrodata can be very large in our compiled models, which leads to relocation out-of-range errors for relative relocations. So we place it after other the sections that are referenced from .text using relative relocations. This is the default behavior in GNU ld. Reviewed By: muchulee8, desertfire, khabinov, chenyang78 Differential Revision: D52557846 Pull Request resolved: https://github.com/pytorch/pytorch/pull/117575 Approved by: https://github.com/chenyang78, https://github.com/khabinov
2025-10-20 12:54:11 +08:00 · 2024-01-18 17:58:18 +00:00
parent 7451dd0585
commit 1b773df3c6
2 changed files with 14 additions and 3 deletions
--- a/torch/_inductor/codecache.py
+++ b/torch/_inductor/codecache.py
@ -65,6 +65,7 @@ from torch.hub import _Faketqdm, tqdm

 _HERE = os.path.abspath(__file__)
 _TORCH_PATH = os.path.dirname(os.path.dirname(_HERE))
+_LINKER_SCRIPT = os.path.join(_TORCH_PATH, "_inductor/script.ld")

 if config.is_fbcode():
    from triton.fb import build_paths
@ -1425,14 +1426,17 @@ def cpp_compile_command(
        if aot_mode and not use_absolute_path:
            inp_name = input
            out_name = output
+            linker_script = _LINKER_SCRIPT
        else:
            # We need to copy any absolute-path torch includes
            inp_name = [os.path.basename(i) for i in input]
            out_name = os.path.basename(output)
+            linker_script = os.path.basename(_LINKER_SCRIPT)
        assert is_clang()
        # Use clang runtime instead of libgcc
        clang_flags += " --rtlib=compiler-rt"
        clang_flags += " -fuse-ld=lld"
+        clang_flags += f" -Wl,--script={linker_script}"
        linker_paths = "-B" + build_paths.glibc_lib()
        linker_paths += " -L" + build_paths.glibc_lib()
    else:
@ -1709,12 +1713,11 @@ def compile_file(
            # When we build remotely, we need to make sure to carefully copy any files
            # that are required during the compilation process into our build directly.
            # This is where all of the ATen/c10/Torch includes come from.
-            torch_includes_path = os.path.join(
-                torch.utils.cpp_extension._TORCH_PATH, "include"
-            )
+            torch_includes_path = os.path.join(_TORCH_PATH, "include")
            with tempfile.TemporaryDirectory() as tmp_dir:
                # Copy everything to tmp compilation folder
                shutil.copy(header_path, os.path.join(tmp_dir, header_name))
+                shutil.copy(_LINKER_SCRIPT, os.path.join(tmp_dir, "script.ld"))
                for p, f in zip(input_paths, input_files):
                    shutil.copy(p, os.path.join(tmp_dir, f))
                dest_include_path = os.path.join(tmp_dir, "include")
--- a/torch/_inductor/script.ld
+++ b/torch/_inductor/script.ld
@ -0,0 +1,8 @@
+SECTIONS {
+  /* By default, in LLD 16, .lrodata is placed immediately after .rodata.
+   * However, .lrodata can be very large in our compiled models, which leads to
+   * relocation out-of-range errors for relative relocations. So we place it
+   * after other the sections that are referenced from .text using relative
+   * relocations. This is the default behavior in GNU ld. */
+  .lrodata : { *(.lrodata) }
+ } INSERT AFTER .bss;