mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "Move prioritized text linker optimization code from setup.py to cmake (#160078)"
This reverts commit 26b3ae58908becbb03b28636f7384d2972a8c9a5. Reverted https://github.com/pytorch/pytorch/pull/160078 on behalf of https://github.com/atalman due to Sorry reverting this broke linux aarch64 CUDA nightlies [pytorch/pytorch/actions/runs/17637486681/job/50146967503](https://github.com/pytorch/pytorch/actions/runs/17637486681/job/50146967503) ([comment](https://github.com/pytorch/pytorch/pull/160078#issuecomment-3281426631))
This commit is contained in:
@ -31,7 +31,8 @@ pip install -r /pytorch/requirements.txt
|
||||
pip install auditwheel==6.2.0 wheel
|
||||
if [ "$DESIRED_CUDA" = "cpu" ]; then
|
||||
echo "BASE_CUDA_VERSION is not set. Building cpu wheel."
|
||||
python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
|
||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn
|
||||
else
|
||||
echo "BASE_CUDA_VERSION is set to: $DESIRED_CUDA"
|
||||
export USE_SYSTEM_NCCL=1
|
||||
@ -45,5 +46,6 @@ else
|
||||
export USE_NVIDIA_PYPI_LIBS=1
|
||||
fi
|
||||
|
||||
python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
|
||||
#USE_PRIORITIZED_TEXT_FOR_LD for enable linker script optimization https://github.com/pytorch/pytorch/pull/121975/files
|
||||
USE_PRIORITIZED_TEXT_FOR_LD=1 python /pytorch/.ci/aarch64_linux/aarch64_wheel_ci_build.py --enable-mkldnn --enable-cuda
|
||||
fi
|
||||
|
@ -317,7 +317,7 @@ if __name__ == "__main__":
|
||||
).decode()
|
||||
|
||||
print("Building PyTorch wheel")
|
||||
build_vars = ""
|
||||
build_vars = "CMAKE_SHARED_LINKER_FLAGS=-Wl,-z,max-page-size=0x10000 "
|
||||
# MAX_JOB=5 is not required for CPU backend (see commit 465d98b)
|
||||
if enable_cuda:
|
||||
build_vars += "MAX_JOBS=5 "
|
||||
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -259,9 +259,6 @@ gen
|
||||
.pytest_cache
|
||||
aten/build/*
|
||||
|
||||
# Linker scripts for prioritized text optimization
|
||||
cmake/linker_script.ld
|
||||
|
||||
# Bram
|
||||
plsdontbreak
|
||||
|
||||
|
@ -379,13 +379,6 @@ cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
|
||||
OFF "USE_CUDA" OFF)
|
||||
cmake_dependent_option(USE_KLEIDIAI "Use KleidiAI for the ARM CPU & AARCH64 architecture." ON
|
||||
"CPU_AARCH64" OFF)
|
||||
# prioritized text linker, ON by default for AArch64+Linux, option visible to all AArch64, x86 and ppc64le.
|
||||
set(USE_PRIORITIZED_TEXT_DEFAULT OFF)
|
||||
if(LINUX AND CPU_AARCH64)
|
||||
set(USE_PRIORITIZED_TEXT_DEFAULT ON)
|
||||
endif()
|
||||
cmake_dependent_option(USE_PRIORITIZED_TEXT_FOR_LD "Use prioritized text linker for ld."
|
||||
"${USE_PRIORITIZED_TEXT_DEFAULT}" "CPU_INTEL OR CPU_AARCH64 OR CPU_POWER" OFF)
|
||||
|
||||
option(USE_MIMALLOC "Use mimalloc" OFF)
|
||||
# Enable third party mimalloc library to improve memory allocation performance
|
||||
@ -664,11 +657,6 @@ endif(MSVC)
|
||||
|
||||
string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")
|
||||
|
||||
# Set linker max-page-size to 64KiB on AArch64 Linux
|
||||
if(LINUX AND CPU_AARCH64)
|
||||
add_link_options_if_supported("-z,max-page-size=0x10000")
|
||||
endif()
|
||||
|
||||
# Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
|
||||
# applicable to mobile are disabled by this variable. Setting
|
||||
# `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it
|
||||
@ -1433,57 +1421,3 @@ if(BUILD_BUNDLE_PTXAS AND USE_CUDA)
|
||||
install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas"
|
||||
DESTINATION "${CMAKE_INSTALL_BINDIR}")
|
||||
endif()
|
||||
|
||||
if(USE_PRIORITIZED_TEXT_FOR_LD)
|
||||
add_compile_options(
|
||||
$<$<COMPILE_LANGUAGE:C,CXX>:-ffunction-sections>
|
||||
$<$<COMPILE_LANGUAGE:C,CXX>:-fdata-sections>
|
||||
)
|
||||
set(LINKER_SCRIPT_FILE_OUT "${CMAKE_SOURCE_DIR}/cmake/linker_script.ld")
|
||||
set(LINKER_SCRIPT_FILE_IN "${CMAKE_SOURCE_DIR}/cmake/prioritized_text.txt")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT "${LINKER_SCRIPT_FILE_OUT}"
|
||||
COMMAND ${Python_EXECUTABLE} ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py --filein "${LINKER_SCRIPT_FILE_IN}" --fout "${LINKER_SCRIPT_FILE_OUT}"
|
||||
DEPENDS ${CMAKE_SOURCE_DIR}/tools/setup_helpers/generate_linker_script.py "${LINKER_SCRIPT_FILE_IN}"
|
||||
COMMENT "Generating prioritized text linker files"
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
add_custom_target(generate_linker_script DEPENDS "${LINKER_SCRIPT_FILE_OUT}")
|
||||
|
||||
if(BUILD_PYTHON)
|
||||
set(LINKER_OPT_TARGETS torch_python)
|
||||
endif()
|
||||
|
||||
if(NOT BUILD_LIBTORCHLESS)
|
||||
list(APPEND LINKER_OPT_TARGETS torch_cpu c10)
|
||||
if(USE_CUDA)
|
||||
list(APPEND LINKER_OPT_TARGETS torch_cuda c10_cuda)
|
||||
endif()
|
||||
if(USE_XPU)
|
||||
list(APPEND LINKER_OPT_TARGETS torch_xpu c10_xpu)
|
||||
endif()
|
||||
if(USE_ROCM)
|
||||
list(APPEND LINKER_OPT_TARGETS torch_hip c10_hip)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
foreach(tgt IN LISTS LINKER_OPT_TARGETS)
|
||||
if(TARGET ${tgt})
|
||||
add_dependencies("${tgt}" generate_linker_script)
|
||||
target_link_options_if_supported(${tgt} "-T,${LINKER_SCRIPT_FILE_OUT}")
|
||||
set_property(TARGET ${tgt} APPEND PROPERTY LINK_DEPENDS "${LINKER_SCRIPT_FILE_OUT}")
|
||||
else()
|
||||
message(WARNING "Requested target '${tgt}' for linker script optimization was not found.")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
else()
|
||||
if(LINUX AND CPU_AARCH64)
|
||||
message(WARNING [[
|
||||
It is strongly recommend to enable linker script optimization for all AArch64 Linux builds.
|
||||
To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
|
||||
]])
|
||||
endif()
|
||||
endif()
|
@ -158,7 +158,6 @@ function(caffe2_print_configuration_summary)
|
||||
if(${USE_KLEIDIAI})
|
||||
message(STATUS " USE_KLEIDIAI : ${USE_KLEIDIAI}")
|
||||
endif()
|
||||
message(STATUS " USE_PRIORITIZED_TEXT_FOR_LD : ${USE_PRIORITIZED_TEXT_FOR_LD}")
|
||||
message(STATUS " USE_UCC : ${USE_UCC}")
|
||||
if(${USE_UCC})
|
||||
message(STATUS " USE_SYSTEM_UCC : ${USE_SYSTEM_UCC}")
|
||||
|
@ -482,7 +482,6 @@ function(torch_update_find_cuda_flags)
|
||||
endfunction()
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
include(CheckLinkerFlag)
|
||||
|
||||
##############################################################################
|
||||
# CHeck if given flag is supported and append it to provided outputvar
|
||||
@ -512,22 +511,3 @@ function(target_compile_options_if_supported target flag)
|
||||
target_compile_options(${target} PRIVATE ${flag})
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
# Check if a global link option is supported
|
||||
function(add_link_options_if_supported flag)
|
||||
check_linker_flag(C "LINKER:${flag}" _supported)
|
||||
if("${_supported}")
|
||||
add_link_options("LINKER:${flag}")
|
||||
else()
|
||||
message(WARNING "Attempted to use unsupported link option : ${flag}.")
|
||||
endif()
|
||||
endfunction()
|
||||
|
||||
function(target_link_options_if_supported tgt flag)
|
||||
check_linker_flag(C "LINKER:${flag}" _supported)
|
||||
if("${_supported}")
|
||||
target_link_options("${tgt}" PRIVATE "LINKER:${flag}")
|
||||
else()
|
||||
message(WARNING "Attempted to use unsupported link option : ${flag}.")
|
||||
endif()
|
||||
endfunction()
|
24
setup.py
24
setup.py
@ -227,6 +227,9 @@
|
||||
# Static link mimalloc into C10, and use mimalloc in alloc_cpu & alloc_free.
|
||||
# By default, It is only enabled on Windows.
|
||||
#
|
||||
# USE_PRIORITIZED_TEXT_FOR_LD
|
||||
# Uses prioritized text form cmake/prioritized_text.txt for LD
|
||||
#
|
||||
# BUILD_LIBTORCH_WHL
|
||||
# Builds libtorch.so and its dependencies as a wheel
|
||||
#
|
||||
@ -320,6 +323,7 @@ from tools.setup_helpers.env import (
|
||||
IS_LINUX,
|
||||
IS_WINDOWS,
|
||||
)
|
||||
from tools.setup_helpers.generate_linker_script import gen_linker_script
|
||||
|
||||
|
||||
def str2bool(value: str | None) -> bool:
|
||||
@ -1623,6 +1627,26 @@ def main() -> None:
|
||||
if BUILD_PYTHON_ONLY:
|
||||
install_requires += [f"{LIBTORCH_PKG_NAME}=={TORCH_VERSION}"]
|
||||
|
||||
if str2bool(os.getenv("USE_PRIORITIZED_TEXT_FOR_LD")):
|
||||
gen_linker_script(
|
||||
filein="cmake/prioritized_text.txt", fout="cmake/linker_script.ld"
|
||||
)
|
||||
linker_script_path = os.path.abspath("cmake/linker_script.ld")
|
||||
os.environ["LDFLAGS"] = os.getenv("LDFLAGS", "") + f" -T{linker_script_path}"
|
||||
os.environ["CFLAGS"] = (
|
||||
os.getenv("CFLAGS", "") + " -ffunction-sections -fdata-sections"
|
||||
)
|
||||
os.environ["CXXFLAGS"] = (
|
||||
os.getenv("CXXFLAGS", "") + " -ffunction-sections -fdata-sections"
|
||||
)
|
||||
elif platform.system() == "Linux" and platform.processor() == "aarch64":
|
||||
print_box(
|
||||
"""
|
||||
WARNING: we strongly recommend enabling linker script optimization for ARM + CUDA.
|
||||
To do so please export USE_PRIORITIZED_TEXT_FOR_LD=1
|
||||
"""
|
||||
)
|
||||
|
||||
# Parse the command line and check the arguments before we proceed with
|
||||
# building deps and setup. We need to set values so `--help` works.
|
||||
dist = Distribution()
|
||||
|
@ -1,7 +1,5 @@
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def gen_linker_script(
|
||||
@ -30,10 +28,6 @@ def gen_linker_script(
|
||||
assert len(text_line_start) == 1, "The linker script has multiple text sections!"
|
||||
text_line_start = text_line_start[0]
|
||||
|
||||
# ensure that parent directory exists before writing
|
||||
fout = Path(fout)
|
||||
fout.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(fout, "w") as f:
|
||||
for lineid, line in enumerate(linker_script_lines):
|
||||
if lineid == text_line_start + 2:
|
||||
@ -42,20 +36,3 @@ def gen_linker_script(
|
||||
f.write(f" .text.{plines}\n")
|
||||
f.write(" )\n")
|
||||
f.write(f"{line}\n")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate linker file based on prioritized symbols. Used for link-time optimization.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--filein",
|
||||
help="Path to prioritized_text.txt input file",
|
||||
default=argparse.SUPPRESS,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fout", help="Output path for linker ld file", default=argparse.SUPPRESS
|
||||
)
|
||||
# convert args to a dict to pass to gen_linker_script
|
||||
kwargs = vars(parser.parse_args())
|
||||
gen_linker_script(**kwargs)
|
||||
|
Reference in New Issue
Block a user