From 54801e6fd6d8c94e6e18433fe45d4cc36411ab9c Mon Sep 17 00:00:00 2001 From: PyTorch MergeBot Date: Thu, 4 Apr 2024 13:22:22 +0000 Subject: [PATCH] Revert "[Distributed] [2/N] Fix clang-tidy warnings in torch/csrc/distributed/c10d (#122892)" This reverts commit 0ba16ffd35af3eb56da4892cc5387c5e8ac864bb. Reverted https://github.com/pytorch/pytorch/pull/122892 on behalf of https://github.com/atalman due to broke cuda tests ([comment](https://github.com/pytorch/pytorch/pull/122892#issuecomment-2037207036)) --- torch/csrc/distributed/c10d/Functional.cpp | 56 ++++++++++------------ torch/csrc/distributed/c10d/reducer.cpp | 7 ++- torch/csrc/distributed/c10d/reducer.hpp | 8 ++-- 3 files changed, 34 insertions(+), 37 deletions(-) diff --git a/torch/csrc/distributed/c10d/Functional.cpp b/torch/csrc/distributed/c10d/Functional.cpp index 9801e8c15904..b0ff3dc0c403 100644 --- a/torch/csrc/distributed/c10d/Functional.cpp +++ b/torch/csrc/distributed/c10d/Functional.cpp @@ -1,3 +1,5 @@ +#include + #include #include #include @@ -5,7 +7,6 @@ #include #include #include -#include namespace { @@ -13,7 +14,7 @@ class WorkRegistry { public: void register_work( const at::Tensor& tensor, - const c10::intrusive_ptr& work) { + c10::intrusive_ptr work) { const auto storage = tensor.storage().getWeakStorageImpl(); std::unique_lock lock(lock_); auto [it, inserted] = registry_.emplace(storage, work); @@ -49,8 +50,8 @@ class WorkRegistry { "is invoked on all tensors returned from c10d_functional collective " "ops before they are used."); } - for (auto& it : registry_) { - it.second.release(); + for (auto it = registry_.begin(); it != registry_.end(); ++it) { + it->second.release(); } } @@ -66,7 +67,7 @@ static WorkRegistry process_registry; void register_work( const at::Tensor& tensor, - const c10::intrusive_ptr& work) { + c10::intrusive_ptr work) { if (c10d::get_thread_isolation_mode()) { c10d::RankLocal::get().register_work(tensor, work); } else { @@ -104,8 +105,8 @@ c10d::ReduceOp to_reduce_op(const std::string& reduce_op) { at::Tensor& all_reduce_( at::Tensor& input, - const std::string& reduce_op, - const std::string& group_name) { + std::string reduce_op, + std::string group_name) { c10d::AllreduceOptions opts; opts.reduceOp = to_reduce_op(reduce_op); @@ -118,16 +119,16 @@ at::Tensor& all_reduce_( at::Tensor all_reduce( const at::Tensor& input, - const std::string& reduce_op, - const std::string& group_name) { + std::string reduce_op, + std::string group_name) { auto output = input.clone(at::MemoryFormat::Contiguous); return all_reduce_(output, reduce_op, group_name); } std::vector all_reduce_coalesced_( std::vector inputs, - const std::string& reduce_op, - const std::string& group_name) { + std::string reduce_op, + std::string group_name) { c10d::AllreduceCoalescedOptions opts; opts.reduceOp = to_reduce_op(reduce_op); @@ -140,9 +141,9 @@ std::vector all_reduce_coalesced_( } std::vector all_reduce_coalesced( - const std::vector& inputs, - const std::string& reduce_op, - const std::string& group_name) { + std::vector inputs, + std::string reduce_op, + std::string group_name) { std::vector outputs; outputs.reserve(inputs.size()); for (const auto& tensor : inputs) { @@ -164,9 +165,8 @@ at::Tensor allocate_all_gather_output( std::vector all_gather_into_tensor_coalesced( std::vector inputs, int64_t group_size, - const std::string& group_name) { + std::string group_name) { std::vector outputs; - outputs.reserve(inputs.size()); for (const auto& tensor : inputs) { outputs.push_back(allocate_all_gather_output(tensor, group_size)); } @@ -183,7 +183,7 @@ std::vector all_gather_into_tensor_coalesced( at::Tensor all_gather_into_tensor( const at::Tensor& input, int64_t group_size, - const std::string& group_name) { + std::string group_name) { std::vector inputs{input}; return all_gather_into_tensor_coalesced(inputs, group_size, group_name)[0]; } @@ -205,13 +205,12 @@ at::Tensor allocate_reduce_scatter_output( std::vector reduce_scatter_tensor_coalesced( std::vector inputs, - const std::string& reduce_op, + std::string reduce_op, int64_t group_size, - const std::string& group_name) { + std::string group_name) { c10d::ReduceScatterOptions opts; opts.reduceOp = to_reduce_op(reduce_op); std::vector outputs; - outputs.reserve(inputs.size()); for (const auto& tensor : inputs) { outputs.push_back(allocate_reduce_scatter_output(tensor, group_size)); } @@ -227,9 +226,9 @@ std::vector reduce_scatter_tensor_coalesced( at::Tensor reduce_scatter_tensor( const at::Tensor& input, - const std::string& reduce_op, + std::string reduce_op, int64_t group_size, - const std::string& group_name) { + std::string group_name) { std::vector inputs{input}; return reduce_scatter_tensor_coalesced( inputs, reduce_op, group_size, group_name)[0]; @@ -239,10 +238,10 @@ at::Tensor all_to_all_single( const at::Tensor& input, std::vector output_split_sizes, std::vector input_split_sizes, - const std::string& group_name) { + std::string group_name) { std::vector output_sizes = input.sizes().vec(); - output_sizes[0] = std::accumulate( - output_split_sizes.begin(), output_split_sizes.end(), int64_t(0)); + output_sizes[0] = + std::accumulate(output_split_sizes.begin(), output_split_sizes.end(), 0); auto output = input.new_empty(output_sizes); auto group = c10d::resolve_process_group(group_name); @@ -255,10 +254,7 @@ at::Tensor all_to_all_single( return output; } -at::Tensor& broadcast_( - at::Tensor& input, - int64_t src, - const std::string& group_name) { +at::Tensor& broadcast_(at::Tensor& input, int64_t src, std::string group_name) { c10d::BroadcastOptions opts; opts.rootRank = src; std::vector inputs{input}; @@ -272,7 +268,7 @@ at::Tensor& broadcast_( at::Tensor broadcast( const at::Tensor& input, int64_t src, - const std::string& group_name) { + std::string group_name) { auto output = input.clone(at::MemoryFormat::Contiguous); return broadcast_(output, src, group_name); } diff --git a/torch/csrc/distributed/c10d/reducer.cpp b/torch/csrc/distributed/c10d/reducer.cpp index dd90fadb1148..b2983f06d446 100644 --- a/torch/csrc/distributed/c10d/reducer.cpp +++ b/torch/csrc/distributed/c10d/reducer.cpp @@ -20,7 +20,6 @@ #include #include #include -#include namespace c10d { namespace { @@ -90,7 +89,7 @@ std::vector extractTensors(const c10::IValue& result) { Reducer::Reducer( std::vector params, std::vector> bucket_indices, - const std::vector& per_bucket_size_limits, + std::vector per_bucket_size_limits, c10::intrusive_ptr process_group, std::vector expect_sparse_gradients, int64_t bucket_bytes_cap, @@ -450,7 +449,7 @@ void Reducer::mark_variable_ready_sparse(size_t variable_index) { if (sparse_metadata_) { grad = grad.coalesce(); REDUCER_CHECK( - !param_names_.empty(), logger_, "No parameter names were found"); + param_names_.size() != 0, logger_, "No parameter names were found"); std::string& param_name = param_names_[variable_index]; auto iter = sparse_metadata_->find(param_name); REDUCER_CHECK( @@ -633,7 +632,7 @@ void Reducer::delay_all_reduce() { } void Reducer::set_logger(std::weak_ptr logger) { - logger_ = std::move(logger); + logger_ = logger; } // The function `autograd_hook` is called after the gradient for a diff --git a/torch/csrc/distributed/c10d/reducer.hpp b/torch/csrc/distributed/c10d/reducer.hpp index 863aeecb4e75..43782204be05 100644 --- a/torch/csrc/distributed/c10d/reducer.hpp +++ b/torch/csrc/distributed/c10d/reducer.hpp @@ -51,7 +51,7 @@ class TORCH_API Reducer { explicit Reducer( std::vector params, std::vector> bucket_indices, - const std::vector& per_bucket_size_limits, + std::vector per_bucket_size_limits, c10::intrusive_ptr process_group, std::vector expect_sparse_gradients, int64_t bucket_bytes_cap, @@ -303,9 +303,11 @@ class TORCH_API Reducer { using GradCallback = std::function; #ifndef _WIN32 static_assert( - std::is_same_v< + std::is_same< GradCallback, - torch::distributed::autograd::DistAutogradContext::GradCallback>); + torch::distributed::autograd::DistAutogradContext::GradCallback>:: + value, + ""); #endif void runGradCallbackForVariable(at::Tensor& variable, GradCallback&& cb);