Simplify nvtx3 CMake handling, always use nvtx3 (#153784)

Fall back to third-party NVTX3 if system NVTX3 doesn't exist. We also reuse the `CUDA::nvtx3` target for better interoperability.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/153784
Approved by: https://github.com/ezyang
This commit is contained in:
cyy
2025-06-23 06:12:42 +00:00
committed by PyTorch MergeBot
parent 31659964a5
commit 099d0d6121
16 changed files with 14 additions and 110 deletions

View File

@ -206,20 +206,6 @@ if sys.platform == "win32":
if os.path.exists(p)
]
if not builtins.any(
os.path.exists(os.path.join(p, "nvToolsExt64_1.dll")) for p in dll_paths
):
nvtoolsext_dll_path = os.path.join(
os.getenv(
"NVTOOLSEXT_PATH",
os.path.join(pfiles_path, "NVIDIA Corporation", "NvToolsExt"),
),
"bin",
"x64",
)
else:
nvtoolsext_dll_path = ""
if cuda_version and builtins.all(
not glob.glob(os.path.join(p, "cudart64*.dll")) for p in dll_paths
):
@ -232,9 +218,7 @@ if sys.platform == "win32":
else:
cuda_path = ""
dll_paths.extend(
p for p in (nvtoolsext_dll_path, cuda_path) if os.path.exists(p)
)
dll_paths.extend(p for p in (cuda_path,) if os.path.exists(p))
kernel32 = ctypes.WinDLL("kernel32.dll", use_last_error=True)
with_load_library_flags = hasattr(kernel32, "AddDllDirectory")
@ -371,7 +355,6 @@ def _load_global_deps() -> None:
"cusparselt": "libcusparseLt.so.*[0-9]",
"cusolver": "libcusolver.so.*[0-9]",
"nccl": "libnccl.so.*[0-9]",
"nvtx": "libnvToolsExt.so.*[0-9]",
"nvshmem": "libnvshmem_host.so.*[0-9]",
}
# cufiile is only available on cuda 12+