[Build] Add linker script optimization (#121975)

This PR adds a linker script optimization based on prioritized symbols that can be extracted from the profiles of popular workloads. The present linker script was generated to target ARM+CUDA and later can be extended if necessary. The reason we target ARM is shown below:

> PyTorch and other applications that access more than 24x 2MB code regions in quick succession can result in performance bottlenecks in the CPU front-end.  The link-time optimization improves executable code locality and improve performance. We recommend turning on the optimization always for PyTorch and other application that behaves similarly.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/121975
Approved by: https://github.com/ptrblck, https://github.com/atalman
This commit is contained in:
Aidyn-A
2024-04-09 20:22:20 +00:00
committed by PyTorch MergeBot
parent 178ce1433c
commit a6080f79e9
3 changed files with 1270 additions and 0 deletions

View File

@ -0,0 +1,37 @@
import subprocess
def gen_linker_script(
filein: str = "cmake/prioritized_text.txt", fout: str = "cmake/linker_script.ld"
) -> None:
with open(filein) as f:
prioritized_text = f.readlines()
prioritized_text = [
line.replace("\n", "") for line in prioritized_text if line != "\n"
]
linker_script_lines = subprocess.check_output(["ld", "-verbose"], text=True).split(
"\n"
)
indices = [
i
for i, x in enumerate(linker_script_lines)
if x == "=================================================="
]
linker_script_lines = linker_script_lines[indices[0] + 1 : indices[1]]
text_line_start = [
i for i, line in enumerate(linker_script_lines) if ".text :" in line
]
assert len(text_line_start) == 1, "The linker script has multiple text sections!"
text_line_start = text_line_start[0]
with open(fout, "w") as f:
for lineid, line in enumerate(linker_script_lines):
if lineid == text_line_start + 2:
f.write(" *(\n")
for plines in prioritized_text:
f.write(f" .text.{plines}\n")
f.write(" )\n")
f.write(f"{line}\n")