mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Move prioritized text linker optimization code from setup.py to cmake (#160078)
Note. This is a replica PR of #155901 which will be closed. I had to create a new PR in order to add it into my ghstack as there are some later commits which depend on it. ### Summary 🚀 This PR moves the prioritized text linker optimization from setup.py to cmake ( and enables by default on Linux aarch64 systems ) This change consolidates what was previously manual CI logic into a single location (cmake), ensuring consistent behavior across local builds, CI pipelines, and developer environments. ### Motivation Prioritized text layout has measurable performance benefits on Arm systems by reducing code padding and improving cache utilization. This optimization was previously triggered manually via CI scripts (.ci/aarch64_linux/aarch64_ci_build.sh) or user-set environment variables. By detecting the target architecture within setup.py, this change enables the optimization automatically where applicable, improving maintainability and usability. Note: Due to ninja/cmake graph generation issues we cannot apply the linker file globally to all targets to the targets must be manually defined. See CMakeLists.txt the main libraries torch_python, torch, torch_cpu, torch_cuda, torch_xpu have been targetted which should be enough to maintain the performance benefits outlined above. Co-authored-by: Usamah Zaheer <usamah.zaheer@arm.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/160078 Approved by: https://github.com/seemethere
This commit is contained in:
committed by
PyTorch MergeBot
parent
be8095b07f
commit
26b3ae5890
24
setup.py
24
setup.py
@ -227,9 +227,6 @@
|
||||
# Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
|
||||
# By default, It is only enabled on Windows.
|
||||
#
|
||||
# USE_PRIORITIZED_TEXT_FOR_LD
|
||||
# Uses prioritized text form cmake/prioritized_text.txt for LD
|
||||
#
|
||||
# BUILD_LIBTORCH_WHL
|
||||
# Builds libtorch.so and its dependencies as a wheel
|
||||
#
|
||||
@ -323,7 +320,6 @@ from tools.setup_helpers.env import (
|
||||
IS_LINUX,
|
||||
IS_WINDOWS,
|
||||
)
|
||||
from tools.setup_helpers.generate_linker_script import gen_linker_script
|
||||
|
||||
|
||||
def str2bool(value: str | None) -> bool:
|
||||
@ -1627,26 +1623,6 @@ def main() -> None:
|
||||
if BUILD_PYTHON_ONLY:
|
||||
install_requires += [f"{LIBTORCH_PKG_NAME}=={TORCH_VERSION}"]
|
||||
|
||||
if str2bool(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD")):
|
||||
gen_linker_script(
|
||||
filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld"
|
||||
)
|
||||
linker_script_path = os.path.abspath("cmake/linker_script.ld")
|
||||
os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}"
|
||||
os.environ["CFLAGS"] = (
|
||||
os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
|
||||
)
|
||||
os.environ["CXXFLAGS"] = (
|
||||
os.getenv("CXXFLAGS", "") + " -ffunction-sections -fdata-sections"
|
||||
)
|
||||
elif platform.system() == "Linux" and platform.processor() == "aarch64":
|
||||
print_box(
|
||||
"""
|
||||
WARNING: we strongly recommend enabling linker script optimization for ARM + CUDA.
|
||||
To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
|
||||
"""
|
||||
)
|
||||
|
||||
# Parse the command line and check the arguments before we proceed with
|
||||
# building deps and setup. We need to set values so `--help` works.
|
||||
dist = Distribution()
|
||||
|
Reference in New Issue
Block a user