mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[AOTI] skip ld and objcopy on Windows. (#158545)
Skip `ld` and `objcopy` on Windows. They are not support on Windows. Pull Request resolved: https://github.com/pytorch/pytorch/pull/158545 Approved by: https://github.com/desertfire
This commit is contained in:
@ -2171,40 +2171,44 @@ ATTRIBUTE_NO_SANITIZE_ADDRESS\t\n"""
|
||||
|
||||
cubins_o = []
|
||||
asm_files = []
|
||||
ld, objcopy = get_ld_and_objcopy(use_relative_path)
|
||||
for kernel_name, value in CudaKernelParamCache.cache.items():
|
||||
if asm_file := value["asm"]:
|
||||
asm_files.append(asm_file)
|
||||
if not _IS_WINDOWS:
|
||||
ld, objcopy = get_ld_and_objcopy(use_relative_path)
|
||||
for kernel_name, value in CudaKernelParamCache.cache.items():
|
||||
if asm_file := value["asm"]:
|
||||
asm_files.append(asm_file)
|
||||
|
||||
cubin_file = value[get_cpp_wrapper_cubin_path_name()]
|
||||
if config.aot_inductor.emit_multi_arch_kernel and device_type == "cuda":
|
||||
current_arch = _nvcc_arch_as_compile_option()
|
||||
cmd = (
|
||||
f"{_cuda_compiler()} -fatbin {asm_file} -o {cubin_file} "
|
||||
# Triton only allows generating PTX version as same as the current arch
|
||||
f"-gencode arch=compute_{current_arch},code=compute_{current_arch} "
|
||||
# Include SASS for the current specific arch
|
||||
f"-gencode arch=compute_{current_arch},code=sm_{current_arch} "
|
||||
)
|
||||
try:
|
||||
subprocess.run(
|
||||
cmd.split(),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
cubin_file = value[get_cpp_wrapper_cubin_path_name()]
|
||||
if (
|
||||
config.aot_inductor.emit_multi_arch_kernel
|
||||
and device_type == "cuda"
|
||||
):
|
||||
current_arch = _nvcc_arch_as_compile_option()
|
||||
cmd = (
|
||||
f"{_cuda_compiler()} -fatbin {asm_file} -o {cubin_file} "
|
||||
# Triton only allows generating PTX version as same as the current arch
|
||||
f"-gencode arch=compute_{current_arch},code=compute_{current_arch} "
|
||||
# Include SASS for the current specific arch
|
||||
f"-gencode arch=compute_{current_arch},code=sm_{current_arch} "
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(
|
||||
f"{cmd} failed with:\nstdout:\n{e.stdout}\nstderr:\n{e.stderr}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
raise
|
||||
try:
|
||||
subprocess.run(
|
||||
cmd.split(),
|
||||
capture_output=True,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(
|
||||
f"{cmd} failed with:\nstdout:\n{e.stdout}\nstderr:\n{e.stderr}",
|
||||
file=sys.stderr,
|
||||
)
|
||||
raise
|
||||
|
||||
if config.aot_inductor.embed_kernel_binary:
|
||||
# Embed cubin files into model.so using objcopy
|
||||
cubins_o.append(
|
||||
convert_cubin_to_obj(cubin_file, kernel_name, ld, objcopy)
|
||||
)
|
||||
if config.aot_inductor.embed_kernel_binary:
|
||||
# Embed cubin files into model.so using objcopy
|
||||
cubins_o.append(
|
||||
convert_cubin_to_obj(cubin_file, kernel_name, ld, objcopy)
|
||||
)
|
||||
|
||||
output_name, output_dir = get_name_and_dir_from_output_file_path(output_so)
|
||||
so_build_options = CppTorchDeviceOptions(
|
||||
|
Reference in New Issue
Block a user