mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[profiler] record nccl version in distributed info (#121044)
Summary: Add a field of NCCL version in distributed info if backend is NCCL Differential Revision: D54432888 Pull Request resolved: https://github.com/pytorch/pytorch/pull/121044 Approved by: https://github.com/aaronenyeshi
This commit is contained in:
committed by
PyTorch MergeBot
parent
3aa512cd72
commit
eea37c6db4
@ -268,13 +268,18 @@ class _KinetoProfile:
|
||||
if not dist.is_available() or not dist.is_initialized():
|
||||
return None
|
||||
|
||||
return {
|
||||
"backend": dist.get_backend(),
|
||||
backend = dist.get_backend()
|
||||
dist_info = {
|
||||
"backend": backend,
|
||||
"rank": dist.get_rank(),
|
||||
"world_size": dist.get_world_size(),
|
||||
"pg_count": dist.get_pg_count(),
|
||||
"pg_config": dist.distributed_c10d._get_all_pg_configs(),
|
||||
}
|
||||
if backend == "nccl":
|
||||
nccl_version = torch.cuda.nccl.version()
|
||||
dist_info["nccl_version"] = ".".join(str(v) for v in nccl_version)
|
||||
return dist_info
|
||||
|
||||
def _memory_profile(self) -> MemoryProfile:
|
||||
required = ("record_shapes", "profile_memory", "with_stack")
|
||||
|
Reference in New Issue
Block a user