Place .lrodata later in the binary (#117575)

Summary:
By default, in LLD 16, .lrodata is placed immediately after .rodata.
However, .lrodata can be very large in our compiled models, which leads to
relocation out-of-range errors for relative relocations. So we place it
after other the sections that are referenced from .text using relative
relocations. This is the default behavior in GNU ld.
Reviewed By: muchulee8, desertfire, khabinov, chenyang78

Differential Revision: D52557846

Pull Request resolved: https://github.com/pytorch/pytorch/pull/117575
Approved by: https://github.com/chenyang78, https://github.com/khabinov
This commit is contained in:
Jez Ng
2024-01-18 17:58:18 +00:00
committed by PyTorch MergeBot
parent 7451dd0585
commit 1b773df3c6
2 changed files with 14 additions and 3 deletions

View File

@ -65,6 +65,7 @@ from torch.hub import _Faketqdm, tqdm
_HERE = os.path.abspath(__file__)
_TORCH_PATH = os.path.dirname(os.path.dirname(_HERE))
_LINKER_SCRIPT = os.path.join(_TORCH_PATH, "_inductor/script.ld")
if config.is_fbcode():
from triton.fb import build_paths
@ -1425,14 +1426,17 @@ def cpp_compile_command(
if aot_mode and not use_absolute_path:
inp_name = input
out_name = output
linker_script = _LINKER_SCRIPT
else:
# We need to copy any absolute-path torch includes
inp_name = [os.path.basename(i) for i in input]
out_name = os.path.basename(output)
linker_script = os.path.basename(_LINKER_SCRIPT)
assert is_clang()
# Use clang runtime instead of libgcc
clang_flags += " --rtlib=compiler-rt"
clang_flags += " -fuse-ld=lld"
clang_flags += f" -Wl,--script={linker_script}"
linker_paths = "-B" + build_paths.glibc_lib()
linker_paths += " -L" + build_paths.glibc_lib()
else:
@ -1709,12 +1713,11 @@ def compile_file(
# When we build remotely, we need to make sure to carefully copy any files
# that are required during the compilation process into our build directly.
# This is where all of the ATen/c10/Torch includes come from.
torch_includes_path = os.path.join(
torch.utils.cpp_extension._TORCH_PATH, "include"
)
torch_includes_path = os.path.join(_TORCH_PATH, "include")
with tempfile.TemporaryDirectory() as tmp_dir:
# Copy everything to tmp compilation folder
shutil.copy(header_path, os.path.join(tmp_dir, header_name))
shutil.copy(_LINKER_SCRIPT, os.path.join(tmp_dir, "script.ld"))
for p, f in zip(input_paths, input_files):
shutil.copy(p, os.path.join(tmp_dir, f))
dest_include_path = os.path.join(tmp_dir, "include")

View File

@ -0,0 +1,8 @@
SECTIONS {
/* By default, in LLD 16, .lrodata is placed immediately after .rodata.
* However, .lrodata can be very large in our compiled models, which leads to
* relocation out-of-range errors for relative relocations. So we place it
* after other the sections that are referenced from .text using relative
* relocations. This is the default behavior in GNU ld. */
.lrodata : { *(.lrodata) }
} INSERT AFTER .bss;