From fa341284356278bed830fc41435cea2903d4cbba Mon Sep 17 00:00:00 2001 From: Dingming Wu Date: Fri, 7 Feb 2025 22:43:52 +0000 Subject: [PATCH] revert PTD's change that leads to signature mismatch of printNcclCommProxyTrace (#146453) Summary: D68801098 introduced this function signature mismatch issue for printNcclCommProxyTrace. Revert it so that trunk build can pass. Test Plan: With the change, build of APS model using rcclexp can now pass: `sh scripts/ltian/run_jobs/fb_fm_v2/run_fb_fm_v2_job.sh -h T20_GTT_MI300X -n 16 -b 1024 -t [2024-12-06] -d ai_infra_ngs -e ai_infra_training_rnd_tc -x 0` Reviewed By: c-p-i-o Differential Revision: D69149588 Pull Request resolved: https://github.com/pytorch/pytorch/pull/146453 Approved by: https://github.com/c-p-i-o --- torch/csrc/distributed/c10d/NCCLUtils.cpp | 2 +- torch/csrc/distributed/c10d/NCCLUtils.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/torch/csrc/distributed/c10d/NCCLUtils.cpp b/torch/csrc/distributed/c10d/NCCLUtils.cpp index 2c50afd0d68c..9b5c59624795 100644 --- a/torch/csrc/distributed/c10d/NCCLUtils.cpp +++ b/torch/csrc/distributed/c10d/NCCLUtils.cpp @@ -549,7 +549,7 @@ std::string getNcclErrorDetailStr( // Dump proxyTrace log to stdout void printNcclCommProxyTrace( - std::string& dumpReason, + const std::string& dumpReason, const std::unordered_map& dumpMap) { LOG(INFO) << "Dumping nccl comm trace, reason: " << dumpReason; for (auto& [key, value] : dumpMap) { diff --git a/torch/csrc/distributed/c10d/NCCLUtils.hpp b/torch/csrc/distributed/c10d/NCCLUtils.hpp index 9b30fa5e03c8..1ec814948562 100644 --- a/torch/csrc/distributed/c10d/NCCLUtils.hpp +++ b/torch/csrc/distributed/c10d/NCCLUtils.hpp @@ -349,7 +349,7 @@ struct ncclRedOpRAII { }; void printNcclCommProxyTrace( - std::string dumpReason, + const std::string& dumpReason, const std::unordered_map& dumpMap); } // namespace c10d