mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[Build] Add linker script optimization (#121975)
This PR adds a linker script optimization based on prioritized symbols that can be extracted from the profiles of popular workloads. The present linker script was generated to target ARM+CUDA and later can be extended if necessary. The reason we target ARM is shown below: > PyTorch and other applications that access more than 24x 2MB code regions in quick succession can result in performance bottlenecks in the CPU front-end. The link-time optimization improves executable code locality and improve performance. We recommend turning on the optimization always for PyTorch and other application that behaves similarly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/121975 Approved by: https://github.com/ptrblck, https://github.com/atalman
This commit is contained in:
committed by
PyTorch MergeBot
parent
178ce1433c
commit
a6080f79e9
1204
cmake/prioritized_text.txt
Normal file
1204
cmake/prioritized_text.txt
Normal file
File diff suppressed because it is too large
Load Diff
29
setup.py
29
setup.py
@ -223,6 +223,9 @@
|
||||
# USE_MIMALLOC
|
||||
# Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
|
||||
# By default, It is only enabled on Windows.
|
||||
#
|
||||
# USE_PRIORITIZED_TEXT_FOR_LD
|
||||
# Uses prioritized text form cmake/prioritized_text.txt for LD
|
||||
|
||||
import sys
|
||||
|
||||
@ -263,6 +266,7 @@ from tools.build_pytorch_libs import build_caffe2
|
||||
from tools.generate_torch_version import get_torch_version
|
||||
from tools.setup_helpers.cmake import CMake
|
||||
from tools.setup_helpers.env import build_type, IS_DARWIN, IS_LINUX, IS_WINDOWS
|
||||
from tools.setup_helpers.generate_linker_script import gen_linker_script
|
||||
|
||||
################################################################################
|
||||
# Parameters parsed from environment
|
||||
@ -1114,6 +1118,31 @@ def main():
|
||||
'mkl>=2021.1.1,<=2021.4.0; platform_system == "Windows"',
|
||||
]
|
||||
|
||||
use_prioritized_text = str(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD", ""))
|
||||
if (
|
||||
use_prioritized_text == ""
|
||||
and platform.system() == "Linux"
|
||||
and platform.processor() == "aarch64"
|
||||
):
|
||||
print_box(
|
||||
"""
|
||||
WARNING: we strongly recommend enabling linker script optimization for ARM + CUDA.
|
||||
To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
|
||||
"""
|
||||
)
|
||||
if use_prioritized_text == "1" or use_prioritized_text == "True":
|
||||
gen_linker_script(
|
||||
filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld"
|
||||
)
|
||||
linker_script_path = os.path.abspath("cmake/linker_script.ld")
|
||||
os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}"
|
||||
os.environ["CFLAGS"] = (
|
||||
os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
|
||||
)
|
||||
os.environ["CXXFLAGS"] = (
|
||||
os.getenv("CXXFLAGS", "") + " -ffunction-sections -fdata-sections"
|
||||
)
|
||||
|
||||
# Parse the command line and check the arguments before we proceed with
|
||||
# building deps and setup. We need to set values so `--help` works.
|
||||
dist = Distribution()
|
||||
|
37
tools/setup_helpers/generate_linker_script.py
Normal file
37
tools/setup_helpers/generate_linker_script.py
Normal file
@ -0,0 +1,37 @@
|
||||
import subprocess
|
||||
|
||||
|
||||
def gen_linker_script(
|
||||
filein: str = "cmake/prioritized_text.txt", fout: str = "cmake/linker_script.ld"
|
||||
) -> None:
|
||||
with open(filein) as f:
|
||||
prioritized_text = f.readlines()
|
||||
prioritized_text = [
|
||||
line.replace("\n", "") for line in prioritized_text if line != "\n"
|
||||
]
|
||||
|
||||
linker_script_lines = subprocess.check_output(["ld", "-verbose"], text=True).split(
|
||||
"\n"
|
||||
)
|
||||
|
||||
indices = [
|
||||
i
|
||||
for i, x in enumerate(linker_script_lines)
|
||||
if x == "=================================================="
|
||||
]
|
||||
linker_script_lines = linker_script_lines[indices[0] + 1 : indices[1]]
|
||||
|
||||
text_line_start = [
|
||||
i for i, line in enumerate(linker_script_lines) if ".text :" in line
|
||||
]
|
||||
assert len(text_line_start) == 1, "The linker script has multiple text sections!"
|
||||
text_line_start = text_line_start[0]
|
||||
|
||||
with open(fout, "w") as f:
|
||||
for lineid, line in enumerate(linker_script_lines):
|
||||
if lineid == text_line_start + 2:
|
||||
f.write(" *(\n")
|
||||
for plines in prioritized_text:
|
||||
f.write(f" .text.{plines}\n")
|
||||
f.write(" )\n")
|
||||
f.write(f"{line}\n")
|
Reference in New Issue
Block a user