Apply clang-format to distributed/c10d folder (#107140)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/107140 Approved by: https://github.com/H-Huang
2025-10-20 21:14:14 +08:00 · 2023-08-14 06:59:47 -07:00
parent 858b465d74
commit dd6319198d
28 changed files with 370 additions and 307 deletions
--- a/torch/csrc/distributed/c10d/reducer.hpp
+++ b/torch/csrc/distributed/c10d/reducer.hpp
@ -1,25 +1,25 @@
 #pragma once

+#include <c10/core/ScalarType.h>
 #include <atomic>
 #include <memory>
 #include <mutex>
 #include <tuple>
 #include <unordered_map>
 #include <vector>
-#include <c10/core/ScalarType.h>

 #include <ATen/core/ivalue_inl.h>
 #include <c10/macros/Macros.h>
 #include <c10/util/intrusive_ptr.h>
+#include <torch/csrc/autograd/function.h>
+#include <torch/csrc/autograd/profiler.h>
+#include <torch/csrc/autograd/variable.h>
 #include <torch/csrc/distributed/c10d/ProcessGroup.hpp>
 #include <torch/csrc/distributed/c10d/Utils.hpp>
 #include <torch/csrc/distributed/c10d/comm.hpp>
 #include <torch/csrc/distributed/c10d/debug.h>
-#include <torch/csrc/distributed/c10d/reducer_timer.hpp>
 #include <torch/csrc/distributed/c10d/default_comm_hooks.hpp>
-#include <torch/csrc/autograd/function.h>
-#include <torch/csrc/autograd/profiler.h>
-#include <torch/csrc/autograd/variable.h>
+#include <torch/csrc/distributed/c10d/reducer_timer.hpp>
 #ifndef _WIN32
 #include <torch/csrc/distributed/autograd/context/context.h>
 #endif
@ -101,7 +101,9 @@ class TORCH_API Reducer {
  // Informs reducer that optimizer is running in backward, so gradients
  // don't need to be copied from buckets as the optimizer would've already
  // been applied.
-  void set_optimizer_in_backward() { optim_in_backward_ = true; };
+  void set_optimizer_in_backward() {
+    optim_in_backward_ = true;
+  };

  // Runs allreduce or installed communication hook given GradBucket instance.
  c10::intrusive_ptr<c10::ivalue::Future> run_comm_hook(
@ -109,7 +111,7 @@ class TORCH_API Reducer {

  // Runs default allreduce hook.
  c10::intrusive_ptr<c10::ivalue::Future> run_allreduce_hook(
-    GradBucket& grad_bucket);
+      GradBucket& grad_bucket);

  // Returns gradient buckets in sequential order of buckets_. This is the order
  // in which buckets are reduced across processes. If return_zero_tensors=true,
@ -133,8 +135,8 @@ class TORCH_API Reducer {
  void setSparseMetadata(std::map<std::string, at::Tensor>& metadata);

  // Install futures that should be awaited at end of backwards. Currently these
-  // are only used by user-defined custom buffer reduction hooks, but can be generalized
-  // to any user-originating futures that need to be awaited.
+  // are only used by user-defined custom buffer reduction hooks, but can be
+  // generalized to any user-originating futures that need to be awaited.
  void install_futures(c10::List<c10::intrusive_ptr<c10::ivalue::Future>> futs);

  // Returns true if we should rebuild buckets, else false. We only rebuild
@ -183,7 +185,8 @@ class TORCH_API Reducer {
  // Removes autograd hooks registered by the Reducer on the model parameters.
  void remove_autograd_hooks();

-  // Checks whether or not the reducer has finalized the current backward iteration.
+  // Checks whether or not the reducer has finalized the current backward
+  // iteration.
  void check_finalized();

 protected:
@ -248,9 +251,10 @@ class TORCH_API Reducer {

  // Weak pointer to associated DDP logger.
  std::weak_ptr<c10d::Logger> logger_;
-  // List of futures installed by Reducer::install_futures that should be awaited
-  // at the end of backwards pass.
-  c10::optional<c10::List<c10::intrusive_ptr<c10::ivalue::Future>>> installed_futures_{c10::nullopt};
+  // List of futures installed by Reducer::install_futures that should be
+  // awaited at the end of backwards pass.
+  c10::optional<c10::List<c10::intrusive_ptr<c10::ivalue::Future>>>
+      installed_futures_{c10::nullopt};
  // Mixed precision parameter dtype for bucket type checking.
  c10::optional<c10::ScalarType> mixed_precision_param_dtype_{c10::nullopt};

@ -273,7 +277,8 @@ class TORCH_API Reducer {
  // bucket_index is a key to cache after buckets are rebuilt, after which this
  // mapping never changes.
  std::vector<at::Tensor> get_variables_for_bucket(
-      size_t bucket_index, const Bucket& bucket) const;
+      size_t bucket_index,
+      const Bucket& bucket) const;

  // Asserts that the reduction for the previous iteration has finished before
  // rebuilding buckets or kicking off the next one.
@ -385,7 +390,6 @@ class TORCH_API Reducer {
    // done on different CUDA streams. We record an event for every copy
    // so that we can synchronize with them prior to kicking off the reduction.
    // std::vector<at::cuda::CUDAEvent> events;
-
  };

  std::vector<Bucket> buckets_;
@ -401,7 +405,9 @@ class TORCH_API Reducer {

    VariableLocator() = default;

-    VariableLocator(size_t bucket_index_, size_t intra_bucket_index_) : bucket_index(bucket_index_), intra_bucket_index(intra_bucket_index_) {}
+    VariableLocator(size_t bucket_index_, size_t intra_bucket_index_)
+        : bucket_index(bucket_index_),
+          intra_bucket_index(intra_bucket_index_) {}
  };

  // Map the index of a variable to its location in the bucket structure.
@ -409,10 +415,12 @@ class TORCH_API Reducer {

  // track the number of iterations to synchronize grads in training so far.
  long num_iterations_;
-  // track distinct iteration of backward call. This is distinct from num_iterations_,
-  // for example in the case of multiple forward before backward.
+  // track distinct iteration of backward call. This is distinct from
+  // num_iterations_, for example in the case of multiple forward before
+  // backward.
  long num_bwd_calls_;
-  // whether the first autograd hook for a distinct backward pass has been called.
+  // whether the first autograd hook for a distinct backward pass has been
+  // called.
  bool first_autograd_hook_called_;
  // track the number of buckets that have been ready for
  // communication calls like allReduce or communication hooks.
@ -543,7 +551,8 @@ class TORCH_API Reducer {

  // Cached bucket index to model parameter mapping. Populated after buckets
  // are rebuilt after which this mapping is static.
-  mutable std::unordered_map<size_t, std::vector<at::Tensor>> cached_variables_for_bucket_;
+  mutable std::unordered_map<size_t, std::vector<at::Tensor>>
+      cached_variables_for_bucket_;

  bool optim_in_backward_{false};
  friend class Logger;