mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[AOTI] Add more default options to compile_standalone (#158560)
Summary: When compiling for standalone, make embed_kernel_binary and emit_multi_arch_kernel default to True, and add a default name for model_name_for_generated_files to make the generated cpp project easier to understand. Also improved the weights object file naming to be more readable. Pull Request resolved: https://github.com/pytorch/pytorch/pull/158560 Approved by: https://github.com/yushangdi
This commit is contained in:
committed by
PyTorch MergeBot
parent
9e0473b566
commit
a991e285ae
@ -1674,12 +1674,6 @@ class AotCodeCompiler:
|
||||
wrapper_code = "\n".join((wrapper_code, kernel_code))
|
||||
kernel_code = ""
|
||||
|
||||
from .utils import aoti_model_name_from_config
|
||||
|
||||
model_class_name = ""
|
||||
if config.aot_inductor.compile_standalone:
|
||||
model_class_name = aoti_model_name_from_config()
|
||||
|
||||
wrapper_key, wrapper_path = write(
|
||||
wrapper_code,
|
||||
"wrapper.cpp",
|
||||
@ -1712,6 +1706,8 @@ class AotCodeCompiler:
|
||||
"model.h",
|
||||
)
|
||||
) as f:
|
||||
# model_name_for_generated_files is guaranteed to be non-empty when compile_standalone
|
||||
model_class_name = config.aot_inductor.model_name_for_generated_files
|
||||
class_name = f"AOTInductorModel{model_class_name}"
|
||||
header_code = f.read()
|
||||
|
||||
@ -1726,7 +1722,7 @@ class AotCodeCompiler:
|
||||
header_code,
|
||||
"h",
|
||||
specified_dir=specified_output_path,
|
||||
key=f"{model_class_name}",
|
||||
key=model_class_name,
|
||||
)
|
||||
|
||||
# Log the AOTInductor wrapper and kernel code, if needed.
|
||||
@ -1840,7 +1836,7 @@ class AotCodeCompiler:
|
||||
consts_asm += f"\t.space {len(consts) - 8}\n"
|
||||
consts_asm += f".globl\t{symbol_prefix}_binary_constants_bin_end\n"
|
||||
consts_asm += f"{symbol_prefix}_binary_constants_bin_end:\n"
|
||||
return consts_asm, "S"
|
||||
return consts_asm, "weights.S"
|
||||
|
||||
# Use c++ to convert consts to object file can support more compilers, such as msvc and icx.
|
||||
def format_consts_to_cpp(
|
||||
@ -1865,7 +1861,7 @@ ATTRIBUTE_NO_SANITIZE_ADDRESS\t\n"""
|
||||
const_cpp += "\t\n"
|
||||
const_cpp += "};\t\n"
|
||||
const_cpp += f"alignas({align_bytes}) extern unsigned char * {symbol_prefix}_binary_constants_bin_end;\t\n"
|
||||
return const_cpp, "cpp"
|
||||
return const_cpp, "weights.cpp"
|
||||
|
||||
if use_asm_build:
|
||||
consts_code, code_ext = format_consts_to_asm(
|
||||
@ -1880,6 +1876,7 @@ ATTRIBUTE_NO_SANITIZE_ADDRESS\t\n"""
|
||||
consts_code,
|
||||
code_ext,
|
||||
specified_dir=str(specified_sub_dir),
|
||||
key=config.aot_inductor.model_name_for_generated_files,
|
||||
)
|
||||
consts_s = Path(consts_s)
|
||||
object_build_options = CppTorchDeviceOptions(
|
||||
@ -2173,7 +2170,13 @@ ATTRIBUTE_NO_SANITIZE_ADDRESS\t\n"""
|
||||
asm_files = []
|
||||
if not _IS_WINDOWS:
|
||||
ld, objcopy = get_ld_and_objcopy(use_relative_path)
|
||||
kernels = getattr(V.graph.wrapper_code, "_kernel_name_to_body", {})
|
||||
for kernel_name, value in CudaKernelParamCache.cache.items():
|
||||
if kernel_name not in kernels:
|
||||
# It is possible that CudaKernelParamCache contains more Triton kernels
|
||||
# than what the current graph uses
|
||||
continue
|
||||
|
||||
if asm_file := value["asm"]:
|
||||
asm_files.append(asm_file)
|
||||
|
||||
|
Reference in New Issue
Block a user