[AOTI] skip ld and objcopy on Windows. (#158545)

Skip `ld` and `objcopy` on Windows. They are not support on Windows.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/158545
Approved by: https://github.com/desertfire
This commit is contained in:
Xu Han
2025-07-17 15:43:20 +00:00
committed by PyTorch MergeBot
parent 2ecf083b72
commit 16b21fa8b2

View File

@ -2171,40 +2171,44 @@ ATTRIBUTE_NO_SANITIZE_ADDRESS\t\n"""
cubins_o = []
asm_files = []
ld, objcopy = get_ld_and_objcopy(use_relative_path)
for kernel_name, value in CudaKernelParamCache.cache.items():
if asm_file := value["asm"]:
asm_files.append(asm_file)
if not _IS_WINDOWS:
ld, objcopy = get_ld_and_objcopy(use_relative_path)
for kernel_name, value in CudaKernelParamCache.cache.items():
if asm_file := value["asm"]:
asm_files.append(asm_file)
cubin_file = value[get_cpp_wrapper_cubin_path_name()]
if config.aot_inductor.emit_multi_arch_kernel and device_type == "cuda":
current_arch = _nvcc_arch_as_compile_option()
cmd = (
f"{_cuda_compiler()} -fatbin {asm_file} -o {cubin_file} "
# Triton only allows generating PTX version as same as the current arch
f"-gencode arch=compute_{current_arch},code=compute_{current_arch} "
# Include SASS for the current specific arch
f"-gencode arch=compute_{current_arch},code=sm_{current_arch} "
)
try:
subprocess.run(
cmd.split(),
capture_output=True,
text=True,
check=True,
cubin_file = value[get_cpp_wrapper_cubin_path_name()]
if (
config.aot_inductor.emit_multi_arch_kernel
and device_type == "cuda"
):
current_arch = _nvcc_arch_as_compile_option()
cmd = (
f"{_cuda_compiler()} -fatbin {asm_file} -o {cubin_file} "
# Triton only allows generating PTX version as same as the current arch
f"-gencode arch=compute_{current_arch},code=compute_{current_arch} "
# Include SASS for the current specific arch
f"-gencode arch=compute_{current_arch},code=sm_{current_arch} "
)
except subprocess.CalledProcessError as e:
print(
f"{cmd} failed with:\nstdout:\n{e.stdout}\nstderr:\n{e.stderr}",
file=sys.stderr,
)
raise
try:
subprocess.run(
cmd.split(),
capture_output=True,
text=True,
check=True,
)
except subprocess.CalledProcessError as e:
print(
f"{cmd} failed with:\nstdout:\n{e.stdout}\nstderr:\n{e.stderr}",
file=sys.stderr,
)
raise
if config.aot_inductor.embed_kernel_binary:
# Embed cubin files into model.so using objcopy
cubins_o.append(
convert_cubin_to_obj(cubin_file, kernel_name, ld, objcopy)
)
if config.aot_inductor.embed_kernel_binary:
# Embed cubin files into model.so using objcopy
cubins_o.append(
convert_cubin_to_obj(cubin_file, kernel_name, ld, objcopy)
)
output_name, output_dir = get_name_and_dir_from_output_file_path(output_so)
so_build_options = CppTorchDeviceOptions(