[AOTI] Add more default options to compile_standalone (#158560)

Summary: When compiling for standalone, make embed_kernel_binary and emit_multi_arch_kernel default to True, and add a default name for model_name_for_generated_files to make the generated cpp project easier to understand. Also improved the weights object file naming to be more readable.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/158560
Approved by: https://github.com/yushangdi
This commit is contained in:
Bin Bao
2025-07-21 09:23:02 -07:00
committed by PyTorch MergeBot
parent 9e0473b566
commit a991e285ae
8 changed files with 171 additions and 62 deletions

View File

@ -1674,12 +1674,6 @@ class AotCodeCompiler:
wrapper_code = "\n".join((wrapper_code, kernel_code))
kernel_code = ""
from .utils import aoti_model_name_from_config
model_class_name = ""
if config.aot_inductor.compile_standalone:
model_class_name = aoti_model_name_from_config()
wrapper_key, wrapper_path = write(
wrapper_code,
"wrapper.cpp",
@ -1712,6 +1706,8 @@ class AotCodeCompiler:
"model.h",
)
) as f:
# model_name_for_generated_files is guaranteed to be non-empty when compile_standalone
model_class_name = config.aot_inductor.model_name_for_generated_files
class_name = f"AOTInductorModel{model_class_name}"
header_code = f.read()
@ -1726,7 +1722,7 @@ class AotCodeCompiler:
header_code,
"h",
specified_dir=specified_output_path,
key=f"{model_class_name}",
key=model_class_name,
)
# Log the AOTInductor wrapper and kernel code, if needed.
@ -1840,7 +1836,7 @@ class AotCodeCompiler:
consts_asm += f"\t.space {len(consts) - 8}\n"
consts_asm += f".globl\t{symbol_prefix}_binary_constants_bin_end\n"
consts_asm += f"{symbol_prefix}_binary_constants_bin_end:\n"
return consts_asm, "S"
return consts_asm, "weights.S"
# Use c++ to convert consts to object file can support more compilers, such as msvc and icx.
def format_consts_to_cpp(
@ -1865,7 +1861,7 @@ ATTRIBUTE_NO_SANITIZE_ADDRESS\t\n"""
const_cpp += "\t\n"
const_cpp += "};\t\n"
const_cpp += f"alignas({align_bytes}) extern unsigned char * {symbol_prefix}_binary_constants_bin_end;\t\n"
return const_cpp, "cpp"
return const_cpp, "weights.cpp"
if use_asm_build:
consts_code, code_ext = format_consts_to_asm(
@ -1880,6 +1876,7 @@ ATTRIBUTE_NO_SANITIZE_ADDRESS\t\n"""
consts_code,
code_ext,
specified_dir=str(specified_sub_dir),
key=config.aot_inductor.model_name_for_generated_files,
)
consts_s = Path(consts_s)
object_build_options = CppTorchDeviceOptions(
@ -2173,7 +2170,13 @@ ATTRIBUTE_NO_SANITIZE_ADDRESS\t\n"""
asm_files = []
if not _IS_WINDOWS:
ld, objcopy = get_ld_and_objcopy(use_relative_path)
kernels = getattr(V.graph.wrapper_code, "_kernel_name_to_body", {})
for kernel_name, value in CudaKernelParamCache.cache.items():
if kernel_name not in kernels:
# It is possible that CudaKernelParamCache contains more Triton kernels
# than what the current graph uses
continue
if asm_file := value["asm"]:
asm_files.append(asm_file)