[Lint] Update clang-format to 19.1.4 (#153889)

All changes other than the one to `tools/linter/adapters/s3_init_config.json` are generated by newer clang-format Pull Request resolved: https://github.com/pytorch/pytorch/pull/153889 Approved by: https://github.com/cyyever, https://github.com/atalman
2025-10-20 21:14:14 +08:00 · 2025-05-19 15:36:11 -07:00
parent d869ea11e0
commit c4d1ff02f8
59 changed files with 550 additions and 522 deletions
--- a/aten/src/ATen/cpu/vec/sve/vec_double.h
+++ b/aten/src/ATen/cpu/vec/sve/vec_double.h
@ -395,8 +395,7 @@ class Vectorized<double> {
      })} // Comparison using the _CMP_**_OQ predicate.
          //   `O`: get false if an operand is NaN
          //   `Q`: do not raise if an operand is NaN
-  Vectorized<double>
+  Vectorized<double> operator==(const Vectorized<double>& other) const {
  operator==(const Vectorized<double>& other) const {
    svbool_t mask = svcmpeq_f64(ptrue, values, other);
    return svsel_f64(mask, ALL_F64_TRUE_MASK, ALL_F64_FALSE_MASK);
  }
--- a/aten/src/ATen/cpu/vec/sve/vec_float.h
+++ b/aten/src/ATen/cpu/vec/sve/vec_float.h
@ -497,8 +497,7 @@ class Vectorized<float> {
      })} // Comparison using the _CMP_**_OQ predicate.
          //   `O`: get false if an operand is NaN
          //   `Q`: do not raise if an operand is NaN
-  Vectorized<float>
+  Vectorized<float> operator==(const Vectorized<float>& other) const {
  operator==(const Vectorized<float>& other) const {
    svbool_t mask = svcmpeq_f32(ptrue, values, other);
    return svsel_f32(mask, ALL_F32_TRUE_MASK, ALL_F32_FALSE_MASK);
  }
--- a/aten/src/ATen/cpu/vec/vec128/vec128_float_neon.h
+++ b/aten/src/ATen/cpu/vec/vec128/vec128_float_neon.h
@ -97,14 +97,14 @@ class Vectorized<float> {
      const Vectorized<float>& a,
      const Vectorized<float>& b) {
    Vectorized<float> vec;
-    vec.values =
+    vec.values = BlendRegs < 0,
-        BlendRegs<0, (mask & 0x01) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x01) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 1,
-        BlendRegs<1, (mask & 0x02) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x02) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 2,
-        BlendRegs<2, (mask & 0x04) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x04) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 3,
-        BlendRegs<3, (mask & 0x08) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x08) != 0 > ::impl(a.values, b.values, vec.values);
    return vec;
  }
  static Vectorized<float> blendv(
--- a/aten/src/ATen/cpu/vec/vec128/vec128_reduced_precision_common_neon.h
+++ b/aten/src/ATen/cpu/vec/vec128/vec128_reduced_precision_common_neon.h
@ -13,8 +13,7 @@ inline namespace CPU_CAPABILITY {
 template <
    typename VecT,
    typename ValueT,
-    template <int, bool>
+    template <int, bool> typename BlendRegs,
    typename BlendRegs,
    typename Derived>
 struct Vectorized16 {
 protected:
@ -54,23 +53,23 @@ struct Vectorized16 {
  template <int64_t mask>
  static Derived blend(const Derived& a, const Derived& b) {
    Derived vec;
-    vec.values =
+    vec.values = BlendRegs < 0,
-        BlendRegs<0, (mask & 0x01) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x01) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 1,
-        BlendRegs<1, (mask & 0x02) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x02) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 2,
-        BlendRegs<2, (mask & 0x04) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x04) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 3,
-        BlendRegs<3, (mask & 0x08) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x08) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 4,
-        BlendRegs<4, (mask & 0x10) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x10) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 5,
-        BlendRegs<5, (mask & 0x20) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x20) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 6,
-        BlendRegs<6, (mask & 0x40) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x40) != 0 > ::impl(a.values, b.values, vec.values);
-    vec.values =
+    vec.values = BlendRegs < 7,
-        BlendRegs<7, (mask & 0x80) != 0>::impl(a.values, b.values, vec.values);
+    (mask & 0x80) != 0 > ::impl(a.values, b.values, vec.values);
    return vec;
  }
--- a/aten/src/ATen/mps/MPSProfiler.mm
+++ b/aten/src/ATen/mps/MPSProfiler.mm
@ -780,8 +780,8 @@ void MPSProfiler::handleIntSignal(int signal) {
 }
 // used to capture sigint signal to log profiling stats
-struct sigaction MPSProfiler::currentSigint {};
+struct sigaction MPSProfiler::currentSigint{};
-struct sigaction MPSProfiler::previousSigint {};
+struct sigaction MPSProfiler::previousSigint{};
 bool MPSProfiler::isCapturing() const {
  return [captureManager isCapturing];
--- a/aten/src/ATen/native/cudnn/RNN.cpp
+++ b/aten/src/ATen/native/cudnn/RNN.cpp
@ -216,7 +216,7 @@ struct RNNDescriptorParams {
      cudnnDataType_t datatype,
      cudnnDataType_t input_datatype) {
 #endif
-      this->set_mode(mode);
+      this -> set_mode(mode);
 #ifdef USE_CUDNN_RNN_V8_API
  this->input_size = input_size;
  this->packed = packed;
--- a/aten/src/ATen/native/mps/kernels/Bucketization.metal
+++ b/aten/src/ATen/native/mps/kernels/Bucketization.metal
@ -153,12 +153,12 @@ kernel void searchsorted(
      constant INPUT_T * data_in [[buffer(0)]],                              \
      constant INPUT_T * data_bd [[buffer(1)]],                              \
      device OUTPUT_T * data_out [[buffer(2)]],                              \
-      constant int64_t & idim_in [[buffer(3)]],                              \
+      constant int64_t& idim_in [[buffer(3)]],                               \
-      constant int64_t & idim_bd [[buffer(4)]],                              \
+      constant int64_t& idim_bd [[buffer(4)]],                               \
-      constant int64_t & numel_in [[buffer(5)]],                             \
+      constant int64_t& numel_in [[buffer(5)]],                              \
-      constant int64_t & right [[buffer(6)]],                                \
+      constant int64_t& right [[buffer(6)]],                                 \
-      constant int64_t & is_1d_boundaries [[buffer(7)]],                     \
+      constant int64_t& is_1d_boundaries [[buffer(7)]],                      \
-      constant int64_t * data_sort [[buffer(8)]],                            \
+      constant int64_t* data_sort [[buffer(8)]],                             \
      uint2 tgid [[threadgroup_position_in_grid]],                           \
      uint2 tid2 [[thread_position_in_threadgroup]],                         \
      uint2 tptg [[threads_per_threadgroup]]);                               \
@ -167,11 +167,11 @@ kernel void searchsorted(
      constant INPUT_T * data_in [[buffer(0)]],                              \
      constant INPUT_T * data_bd [[buffer(1)]],                              \
      device OUTPUT_T * data_out [[buffer(2)]],                              \
-      constant int64_t & idim_in [[buffer(3)]],                              \
+      constant int64_t& idim_in [[buffer(3)]],                               \
-      constant int64_t & idim_bd [[buffer(4)]],                              \
+      constant int64_t& idim_bd [[buffer(4)]],                               \
-      constant int64_t & numel_in [[buffer(5)]],                             \
+      constant int64_t& numel_in [[buffer(5)]],                              \
-      constant int64_t & right [[buffer(6)]],                                \
+      constant int64_t& right [[buffer(6)]],                                 \
-      constant int64_t & is_1d_boundaries [[buffer(7)]],                     \
+      constant int64_t& is_1d_boundaries [[buffer(7)]],                      \
      uint2 tgid [[threadgroup_position_in_grid]],                           \
      uint2 tid2 [[thread_position_in_threadgroup]],                         \
      uint2 tptg [[threads_per_threadgroup]]);
--- a/aten/src/ATen/native/mps/kernels/HistogramKernel.metal
+++ b/aten/src/ATen/native/mps/kernels/HistogramKernel.metal
@ -94,21 +94,21 @@ kernel void histogramdd(
  }
 }
-#define REGISTER_HISTOGRAMDD_OP(DTYPE)                           \
+#define REGISTER_HISTOGRAMDD_OP(DTYPE)                          \
-  template [[host_name("histogramdd_" #DTYPE)]] kernel void      \
+  template [[host_name("histogramdd_" #DTYPE)]] kernel void     \
-  histogramdd<DTYPE>(                                            \
+  histogramdd<DTYPE>(                                           \
-      constant DTYPE * input_ [[buffer(0)]],                     \
+      constant DTYPE * input_ [[buffer(0)]],                    \
-      constant DTYPE * weight [[buffer(1)]],                     \
+      constant DTYPE * weight [[buffer(1)]],                    \
-      device DTYPE * local_out [[buffer(2)]],                    \
+      device DTYPE * local_out [[buffer(2)]],                   \
-      constant uint * offsets [[buffer(3)]],                     \
+      constant uint * offsets [[buffer(3)]],                    \
-      constant size_t & num_dims [[buffer(4)]],                  \
+      constant size_t& num_dims [[buffer(4)]],                  \
-      constant DTYPE * bin_seq [[buffer(5)]],                    \
+      constant DTYPE* bin_seq [[buffer(5)]],                    \
-      constant int64_t * num_bin_edges [[buffer(6)]],            \
+      constant int64_t* num_bin_edges [[buffer(6)]],            \
-      constant DTYPE * leftmost_edge [[buffer(7)]],              \
+      constant DTYPE* leftmost_edge [[buffer(7)]],              \
-      constant DTYPE * rightmost_edge [[buffer(8)]],             \
+      constant DTYPE* rightmost_edge [[buffer(8)]],             \
-      constant int64_t * local_out_strides [[buffer(9)]],        \
+      constant int64_t* local_out_strides [[buffer(9)]],        \
-      constant uint8_t & bin_selection_algorithm [[buffer(10)]], \
+      constant uint8_t& bin_selection_algorithm [[buffer(10)]], \
-      constant uint8_t & has_weight [[buffer(11)]],              \
+      constant uint8_t& has_weight [[buffer(11)]],              \
      uint tid [[thread_position_in_grid]]);
 REGISTER_HISTOGRAMDD_OP(float);
--- a/aten/src/ATen/native/mps/kernels/TriangularOps.metal
+++ b/aten/src/ATen/native/mps/kernels/TriangularOps.metal
@ -209,11 +209,11 @@ kernel void triu_indices(
  template [[host_name(#NAME "_indices_" #DTYPE)]] kernel void \
      NAME##_indices<DTYPE>(                                   \
          device DTYPE * tensor,                               \
-          constant int64_t & col_offset,                       \
+          constant int64_t& col_offset,                        \
-          constant int64_t & m_first_row,                      \
+          constant int64_t& m_first_row,                       \
-          constant int64_t & col,                              \
+          constant int64_t& col,                               \
-          constant int64_t & rectangle_size,                   \
+          constant int64_t& rectangle_size,                    \
-          constant int64_t & triu_size,                        \
+          constant int64_t& triu_size,                         \
          uint linear_index [[thread_position_in_grid]])
 INSTANTIATE_TRI_INDICES(triu, long);
--- a/aten/src/ATen/native/vulkan/ops/Unsqueeze.cpp
+++ b/aten/src/ATen/native/vulkan/ops/Unsqueeze.cpp
@ -88,15 +88,13 @@ Tensor unsqueeze(const at::Tensor& self, int64_t dim) {
    }
    // Create the params buffer
-    struct Block block {
+    struct Block block{{
      {
        // Dimension to unsqueeze
        static_cast<int32_t>(dim),
-            // Keep track of the channel in Image3D
+        // Keep track of the channel in Image3D
-            static_cast<int32_t>(
+        static_cast<int32_t>(
-                std::ceil(static_cast<float>(output_size[channel_index]) / 4)),
+            std::ceil(static_cast<float>(output_size[channel_index]) / 4)),
-      }
+    }};
    };
    api::UniformParamsBuffer params(context, block);
--- a/c10/core/TensorImpl.h
+++ b/c10/core/TensorImpl.h
@ -193,11 +193,11 @@ struct C10_API AutogradMetaFactory {
 C10_API void SetAutogradMetaFactory(AutogradMetaFactory* factory);
 C10_API AutogradMetaFactory* GetAutogradMetaFactory();
-struct C10_API AutogradMetaFactoryRegisterer {
+struct C10_API AutogradMetaFactoryRegisterer{
-  explicit AutogradMetaFactoryRegisterer(AutogradMetaFactory* factory) {
+    explicit AutogradMetaFactoryRegisterer(AutogradMetaFactory * factory){
-    SetAutogradMetaFactory(factory);
+        SetAutogradMetaFactory(factory);
-  }
+} // namespace impl
-};
+}; // namespace c10
 } // namespace impl
--- a/c10/macros/Macros.h
+++ b/c10/macros/Macros.h
@ -286,7 +286,7 @@ constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256;
 #define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm)        \
  ((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \
        ? (blocks_per_sm)                                              \
-        : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block)-1) /         \
+        : ((CUDA_MAX_THREADS_PER_SM + (threads_per_block) - 1) /       \
           (threads_per_block))))
 // C10_LAUNCH_BOUNDS is analogous to __launch_bounds__
 #define C10_LAUNCH_BOUNDS_0 \
--- a/c10/test/util/Metaprogramming_test.cpp
+++ b/c10/test/util/Metaprogramming_test.cpp
@ -68,8 +68,7 @@ static_assert(
 } // namespace test_function_traits
 struct MovableOnly {
-  constexpr MovableOnly(int val_) : val(val_) { /* no default constructor */
+  constexpr MovableOnly(int val_) : val(val_) { /* no default constructor */ }
  }
  MovableOnly(const MovableOnly&) = delete;
  MovableOnly(MovableOnly&&) = default;
  MovableOnly& operator=(const MovableOnly&) = delete;
--- a/c10/test/util/tempfile_test.cpp
+++ b/c10/test/util/tempfile_test.cpp
@ -5,20 +5,20 @@
 #if !defined(_WIN32)
 static bool file_exists(const char* path) {
-  struct stat st {};
+  struct stat st{};
  return stat(path, &st) == 0 && S_ISREG(st.st_mode);
 }
 static bool directory_exists(const char* path) {
-  struct stat st {};
+  struct stat st{};
  return stat(path, &st) == 0 && S_ISDIR(st.st_mode);
 }
 #else
 static bool file_exists(const char* path) {
-  struct _stat st {};
+  struct _stat st{};
  return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFREG);
 }
 static bool directory_exists(const char* path) {
-  struct _stat st {};
+  struct _stat st{};
  return _stat(path, &st) == 0 && ((st.st_mode & _S_IFMT) == _S_IFDIR);
 }
 #endif // !defined(_WIN32)
--- a/c10/util/ApproximateClock.h
+++ b/c10/util/ApproximateClock.h
@ -59,7 +59,7 @@ inline time_t getTime(bool allow_monotonic = false) {
      .count();
 #else
  // clock_gettime is *much* faster than std::chrono implementation on Linux
-  struct timespec t {};
+  struct timespec t{};
  auto mode = CLOCK_REALTIME;
  if (allow_monotonic) {
    mode = CLOCK_MONOTONIC;
--- a/c10/util/Exception.h
+++ b/c10/util/Exception.h
@ -116,8 +116,8 @@ class C10_API Error : public std::exception {
 class C10_API Warning {
 public:
-  class C10_API UserWarning {};
+  class C10_API UserWarning{};
-  class C10_API DeprecationWarning {};
+  class C10_API DeprecationWarning{};
  using warning_variant_t = std::variant<UserWarning, DeprecationWarning>;
--- a/c10/util/LeftRight.h
+++ b/c10/util/LeftRight.h
@ -70,7 +70,9 @@ class LeftRight final {
  ~LeftRight() {
    // wait until any potentially running writers are finished
-    { std::unique_lock<std::mutex> lock(_writeMutex); }
+    {
      std::unique_lock<std::mutex> lock(_writeMutex);
    }
    // wait until any potentially running readers are finished
    while (_counters[0].load() != 0 || _counters[1].load() != 0) {
--- a/c10/util/SmallVector.h
+++ b/c10/util/SmallVector.h
@ -370,9 +370,9 @@ class SmallVectorTemplateCommon
 /// note
 template <
    typename T,
-    bool = (std::is_trivially_copy_constructible_v<T>)&&(
+    bool = (std::is_trivially_copy_constructible_v<T>) &&
-        std::is_trivially_move_constructible_v<
+        (std::is_trivially_move_constructible_v<T>) &&
-            T>)&&std::is_trivially_destructible_v<T>>
+        std::is_trivially_destructible_v<T>>
 class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
  friend class SmallVectorTemplateCommon<T>;
--- a/c10/util/signal_handler.cpp
+++ b/c10/util/signal_handler.cpp
@ -59,7 +59,7 @@ void hookupHandler() {
  if (hookedUpCount++) {
    return;
  }
-  struct sigaction sa {};
+  struct sigaction sa{};
  // Setup the handler
  sa.sa_handler = &handleSignal;
  // Restart the system call, if at all possible
@ -80,7 +80,7 @@ void unhookHandler() {
  if (--hookedUpCount > 0) {
    return;
  }
-  struct sigaction sa {};
+  struct sigaction sa{};
  // Setup the sighub handler
  sa.sa_handler = SIG_DFL;
  // Restart the system call, if at all possible
@ -273,7 +273,7 @@ void FatalSignalHandler::installFatalSignalHandlers() {
    return;
  }
  fatalSignalHandlersInstalled = true;
-  struct sigaction sa {};
+  struct sigaction sa{};
  sigemptyset(&sa.sa_mask);
  // Since we'll be in an exiting situation it's possible there's memory
  // corruption, so make our own stack just in case.
--- a/c10/util/signal_handler.h
+++ b/c10/util/signal_handler.h
@ -88,7 +88,7 @@ class C10_API FatalSignalHandler {
  bool fatalSignalHandlersInstalled;
  // We need to hold a reference to call the previous SIGUSR2 handler in case
  // we didn't signal it
-  struct sigaction previousSigusr2 {};
+  struct sigaction previousSigusr2{};
  // Flag dictating whether the SIGUSR2 handler falls back to previous handlers
  // or is intercepted in order to print a stack trace.
  std::atomic<bool> fatalSignalReceived;
--- a/test/cpp/api/dataloader.cpp
+++ b/test/cpp/api/dataloader.cpp
@ -87,7 +87,7 @@ struct DummyChunkDataReader : public datasets::ChunkDataReader<int> {
    return chunk_count_;
  };
-  void reset() override{};
+  void reset() override {};
  const static size_t chunk_count_ = 3;
  // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-magic-numbers,cppcoreguidelines-avoid-c-arrays)
@ -1479,7 +1479,7 @@ TEST(DataLoaderTest, StatefulDatasetWithNoWorkers) {
    void reset() override {
      counter = 0;
    }
-    void save(torch::serialize::OutputArchive& archive) const override{};
+    void save(torch::serialize::OutputArchive& archive) const override {};
    void load(torch::serialize::InputArchive& archive) override {}
    int counter = 0;
  };
@ -1517,7 +1517,7 @@ TEST(DataLoaderTest, StatefulDatasetWithManyWorkers) {
    void reset() override {
      counter = 0;
    }
-    void save(torch::serialize::OutputArchive& archive) const override{};
+    void save(torch::serialize::OutputArchive& archive) const override {};
    void load(torch::serialize::InputArchive& archive) override {}
    int counter = 0;
    std::mutex mutex;
@ -1556,7 +1556,7 @@ TEST(DataLoaderTest, StatefulDatasetWithMap) {
    void reset() override {
      counter = 0;
    }
-    void save(torch::serialize::OutputArchive& archive) const override{};
+    void save(torch::serialize::OutputArchive& archive) const override {};
    void load(torch::serialize::InputArchive& archive) override {}
    int counter = 0;
  };
@ -1605,7 +1605,7 @@ TEST(DataLoaderTest, StatefulDatasetWithCollate) {
    void reset() override {
      counter = 0;
    }
-    void save(torch::serialize::OutputArchive& archive) const override{};
+    void save(torch::serialize::OutputArchive& archive) const override {};
    void load(torch::serialize::InputArchive& archive) override {}
    int counter = 0;
  };
@ -1747,7 +1747,7 @@ TEST(DataLoaderTest, ChunkDataSetWithEmptyBatch) {
      return 1;
    };
-    void reset() override{};
+    void reset() override {};
  };
  const size_t prefetch_count = 1;
@ -1791,7 +1791,7 @@ TEST(DataLoaderTest, ChunkDataSetGetBatchWithUnevenBatchSize) {
      return 2;
    };
-    void reset() override{};
+    void reset() override {};
  };
  // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
@ -1936,7 +1936,7 @@ TEST(DataLoaderTest, ChunkDatasetSave) {
      return chunk_count_;
    };
-    void reset() override{};
+    void reset() override {};
    BatchType batch_data_ = BatchType(chunk_size, 0);
  };
@ -2115,7 +2115,7 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
  class S : public samplers::Sampler<> {
   public:
-    explicit S(size_t size) : size_(size), index_(0){};
+    explicit S(size_t size) : size_(size), index_(0) {};
    void reset(std::optional<size_t> new_size = std::nullopt) override {
      if (new_size.has_value()) {
@ -2170,7 +2170,7 @@ TEST(DataLoaderTest, ChunkDatasetCrossChunkShuffle) {
      return chunk_count_;
    };
-    void reset() override{};
+    void reset() override {};
    size_t chunk_count_;
  };
@ -2258,7 +2258,7 @@ TEST(DataLoaderTest, CustomPreprocessPolicy) {
      return chunk_count_;
    };
-    void reset() override{};
+    void reset() override {};
    size_t chunk_count_;
  };
--- a/test/cpp/api/modules.cpp
+++ b/test/cpp/api/modules.cpp
@ -127,9 +127,7 @@ TEST_F(ModulesTest, Conv2dSameStrided) {
      [&] { Conv2d model_invalid(options.stride(2)); }(),
      "padding='same' is not supported for strided convolutions");
  ASSERT_THROWS_WITH(
-      [&] {
+      [&] { Conv2d model_invalid(options.stride({1, 2})); }(),
        Conv2d model_invalid(options.stride({1, 2}));
      }(),
      "padding='same' is not supported for strided convolutions");
 }
@ -181,9 +179,7 @@ TEST_F(ModulesTest, Conv3dSameStrided) {
      [&] { Conv3d model_invalid(options.stride(2)); }(),
      "padding='same' is not supported for strided convolutions");
  ASSERT_THROWS_WITH(
-      [&] {
+      [&] { Conv3d model_invalid(options.stride({1, 2, 1})); }(),
        Conv3d model_invalid(options.stride({1, 2, 1}));
      }(),
      "padding='same' is not supported for strided convolutions");
 }
--- a/test/cpp/api/tensor.cpp
+++ b/test/cpp/api/tensor.cpp
@ -920,7 +920,9 @@ TEST(TensorTest, Arange) {
 }
 TEST(TensorTest, PrettyPrintTensorDataContainer) {
-  { ASSERT_EQ(c10::str(torch::detail::TensorDataContainer(1.1)), "1.1"); }
+  {
    ASSERT_EQ(c10::str(torch::detail::TensorDataContainer(1.1)), "1.1");
  }
  {
    ASSERT_EQ(
        c10::str(torch::detail::TensorDataContainer({1.1, 2.2})), "{1.1, 2.2}");
--- a/test/cpp/jit/test_alias_analysis.cpp
+++ b/test/cpp/jit/test_alias_analysis.cpp
@ -1033,9 +1033,8 @@ TEST(ContainerAliasingTest, MovesAcrossContainedWrites) {
  auto ops = torch::RegisterOperators().op(
      "uses::list",
      torch::RegisterOperators::options()
-          .catchAllKernel([](torch::List<at::Tensor> in) {
+          .catchAllKernel(
-            return torch::rand({2, 3});
+              [](torch::List<at::Tensor> in) { return torch::rand({2, 3}); })
          })
          .aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
  // Write to the inside of a list. Check that we can't reorder a
  // print across it.
@ -1073,9 +1072,8 @@ TEST(ContainerAliasingTest, MovesAcrossContainedWritesNested) {
  auto ops = torch::RegisterOperators().op(
      "uses::list",
      torch::RegisterOperators::options()
-          .catchAllKernel([](torch::List<at::Tensor> in) {
+          .catchAllKernel(
-            return torch::rand({2, 3});
+              [](torch::List<at::Tensor> in) { return torch::rand({2, 3}); })
          })
          .aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
  // Write to the inside of a list. Check that we can't reorder a
  // print across it.
@ -1257,9 +1255,8 @@ TEST(AliasRegistrationTest, ConservativeWithInferredSchema) {
  auto registry = torch::RegisterOperators().op(
      "foo::rand1",
      torch::RegisterOperators::options()
-          .catchAllKernel([](at::Tensor) -> at::Tensor {
+          .catchAllKernel(
-            return at::rand({2, 2});
+              [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
          })
          .aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
  const auto rand_op = Symbol::fromQualString("foo::rand1");
  auto graph = std::make_shared<Graph>();
@ -1274,9 +1271,8 @@ TEST(AliasRegistrationTest, ConservativeWithSpecifiedSchema) {
  auto registry = torch::RegisterOperators().op(
      "foo::rand2(Tensor arg1) -> Tensor",
      torch::RegisterOperators::options()
-          .catchAllKernel([](at::Tensor) -> at::Tensor {
+          .catchAllKernel(
-            return at::rand({2, 2});
+              [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
          })
          .aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
  const auto rand_op = Symbol::fromQualString("foo::rand2");
  auto graph = std::make_shared<Graph>();
@ -1291,9 +1287,8 @@ TEST(AliasRegistrationTest, ConservativeWithAliasingAnnotationsShouldError) {
  auto registry = torch::RegisterOperators().op(
      "foo::rand3(Tensor(a) arg1) -> Tensor(b)",
      torch::RegisterOperators::options()
-          .catchAllKernel([](at::Tensor) -> at::Tensor {
+          .catchAllKernel(
-            return at::rand({2, 2});
+              [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
          })
          .aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
  const auto rand_op = Symbol::fromQualString("foo::rand3");
@ -1312,9 +1307,8 @@ TEST(AliasRegistrationTest, ConservativeWithAliasingAnnotationsShouldError2) {
  auto registry = torch::RegisterOperators().op(
      "foo::rand4(Tensor(a) arg1) -> Tensor(a)",
      torch::RegisterOperators::options()
-          .catchAllKernel([](at::Tensor) -> at::Tensor {
+          .catchAllKernel(
-            return at::rand({2, 2});
+              [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
          })
          .aliasAnalysis(AliasAnalysisKind::CONSERVATIVE));
  const auto rand_op = Symbol::fromQualString("foo::rand4");
  auto graph = std::make_shared<Graph>();
@ -1334,9 +1328,8 @@ TEST(AliasRegistrationTest, FromSchemaWithInferredSchemaShouldError) {
        torch::RegisterOperators().op(
            "foo::rand5",
            torch::RegisterOperators::options()
-                .catchAllKernel([](at::Tensor) -> at::Tensor {
+                .catchAllKernel(
-                  return at::rand({2, 2});
+                    [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
                })
                .aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA));
      },
      "Tried to register operator foo::rand5(Tensor _0) -> Tensor _0 with AliasAnalysisKind::FROM_SCHEMA, but the schema is inferred");
@ -1346,9 +1339,8 @@ TEST(AliasRegistrationTest, FromSchemaInferredPure) {
  auto registry = torch::RegisterOperators().op(
      "foo::rand6(Tensor arg1) -> Tensor",
      torch::RegisterOperators::options()
-          .catchAllKernel([](at::Tensor) -> at::Tensor {
+          .catchAllKernel(
-            return at::rand({2, 2});
+              [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
          })
          .aliasAnalysis(AliasAnalysisKind::FROM_SCHEMA));
  const auto rand_op = Symbol::fromQualString("foo::rand6");
  auto graph = std::make_shared<Graph>();
@ -1395,9 +1387,8 @@ TEST(AliasRegistrationTest, PureNoSchema) {
  auto registry = torch::RegisterOperators().op(
      "foo::rand9",
      torch::RegisterOperators::options()
-          .catchAllKernel([](at::Tensor) -> at::Tensor {
+          .catchAllKernel(
-            return at::rand({2, 2});
+              [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
          })
          .aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
  const auto rand_op = Symbol::fromQualString("foo::rand9");
  auto graph = std::make_shared<Graph>();
@ -1412,9 +1403,8 @@ TEST(AliasRegistrationTest, PureWithSchema) {
  auto registry = torch::RegisterOperators().op(
      "foo::rand10(Tensor arg1) -> Tensor",
      torch::RegisterOperators::options()
-          .catchAllKernel([](at::Tensor) -> at::Tensor {
+          .catchAllKernel(
-            return at::rand({2, 2});
+              [](at::Tensor) -> at::Tensor { return at::rand({2, 2}); })
          })
          .aliasAnalysis(AliasAnalysisKind::PURE_FUNCTION));
  const auto rand_op = Symbol::fromQualString("foo::rand10");
  auto graph = std::make_shared<Graph>();
--- a/test/cpp/jit/test_autodiff.cpp
+++ b/test/cpp/jit/test_autodiff.cpp
@ -121,14 +121,10 @@ TEST(AutodiffTest, ADFormulas) {
      {"t", unary_pointwise_2d, [](const VL& v) -> VL { return {v[0].t()}; }},
      {"view",
       unary_pointwise_2d,
-       [](const VL& v) -> VL {
+       [](const VL& v) -> VL { return {v[0].view({3, 2})}; }},
         return {v[0].view({3, 2})};
       }},
      {"expand",
       {{2, 1}},
-       [](const VL& v) -> VL {
+       [](const VL& v) -> VL { return {v[0].expand({2, 3})}; }},
         return {v[0].expand({2, 3})};
       }},
      {"mm",
       {{10, 12}, {12, 15}},
       [](const VL& v) -> VL { return {v[0].mm(v[1])}; }},
--- a/test/cpp/jit/test_misc.cpp
+++ b/test/cpp/jit/test_misc.cpp
@ -863,8 +863,12 @@ void checkScopeCallbacks() {
  {
    RECORD_TORCHSCRIPT_FUNCTION("test_method", {});
-    { RECORD_FUNCTION("test_function", {}); }
+    {
-    { RECORD_USER_SCOPE("test_user_scope"); }
+      RECORD_FUNCTION("test_function", {});
    }
    {
      RECORD_USER_SCOPE("test_user_scope");
    }
  }
  TORCH_CHECK(!bad_scope);
@ -1057,7 +1061,9 @@ TEST(RecordFunctionTest, RecordFunctionGuard) {
          RECORD_USER_SCOPE("C");
        }
      }
-      { RECORD_USER_SCOPE("D"); }
+      {
        RECORD_USER_SCOPE("D");
      }
    }
  }
  TORCH_CHECK(fn_names.size() == 1);
@ -1084,7 +1090,9 @@ TEST(RecordFunctionTest, Callbacks) {
  add_remove_test_add_cb<2>();
  auto h3 = add_remove_test_add_cb<3>();
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  TORCH_CHECK(ids.size() == 3);
  TORCH_CHECK(std::find(ids.begin(), ids.end(), 1) != ids.end());
@ -1094,7 +1102,9 @@ TEST(RecordFunctionTest, Callbacks) {
  ids.clear();
  removeCallback(h1);
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  TORCH_CHECK(ids.size() == 2);
  TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
@ -1103,7 +1113,9 @@ TEST(RecordFunctionTest, Callbacks) {
  ids.clear();
  removeCallback(h3);
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  TORCH_CHECK(ids.size() == 1);
  TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
@ -1115,7 +1127,9 @@ TEST(RecordFunctionTest, Callbacks) {
  ids.clear();
  add_remove_test_add_cb<1>();
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  TORCH_CHECK(ids.size() == 1);
  TORCH_CHECK(ids[0] == 1);
@ -1128,7 +1142,9 @@ TEST(RecordFunctionTest, Callbacks) {
          return nullptr;
        }));
-    { RECORD_USER_SCOPE("test_thread"); }
+    {
      RECORD_USER_SCOPE("test_thread");
    }
  });
  th.join();
  TORCH_CHECK(ids.size() == 2);
@ -1136,7 +1152,9 @@ TEST(RecordFunctionTest, Callbacks) {
  TORCH_CHECK(std::find(ids.begin(), ids.end(), 2) != ids.end());
  ids.clear();
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  TORCH_CHECK(ids.size() == 1);
  TORCH_CHECK(ids[0] == 1);
@ -1167,7 +1185,9 @@ TEST(RecordFunctionTest, Callbacks) {
          TORCH_CHECK(ctx->b == "test_str");
        }));
-    { RECORD_USER_SCOPE("test"); }
+    {
      RECORD_USER_SCOPE("test");
    }
    TORCH_CHECK(ids.size() == 1);
    TORCH_CHECK(ids[0] == 1);
@ -1193,7 +1213,9 @@ TEST(RecordFunctionTest, Callbacks) {
          }));
      // Will call both global and thread local callbacks.
-      { RECORD_USER_SCOPE("test_thread"); }
+      {
        RECORD_USER_SCOPE("test_thread");
      }
    });
    ctx_th.join();
    TORCH_CHECK(ids.size() == 2);
@ -1216,21 +1238,27 @@ TEST(RecordFunctionTest, ShouldRun) {
        return nullptr;
      }));
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  EXPECT_TRUE(ran) << "first run didn't happen";
  ran = false;
  disableCallback(handle);
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  EXPECT_FALSE(ran) << "second run happened but shouldn't have";
  ran = false;
  reenableCallback(handle);
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  EXPECT_TRUE(ran) << "run after re-enable didn't happen";
  ran = false;
@ -1273,7 +1301,9 @@ TEST(RecordFunctionTest, Basic) {
            return nullptr;
          })
          .needsIds(true));
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  TORCH_CHECK(has_ids);
  clearCallbacks();
  has_ids = false;
@ -1282,7 +1312,9 @@ TEST(RecordFunctionTest, Basic) {
        has_ids = fn.handle() > 0;
        return nullptr;
      }));
-  { RECORD_USER_SCOPE("test"); }
+  {
    RECORD_USER_SCOPE("test");
  }
  TORCH_CHECK(!has_ids);
  clearCallbacks();
 }
--- a/test/cpp/profiler/record_function.cpp
+++ b/test/cpp/profiler/record_function.cpp
@ -144,7 +144,9 @@ TEST(RecordFunctionTest, CallOrder) {
 #undef REGISTER_CALLBACK
  RECORD_FUNCTION("Outer", {});
-  { RECORD_FUNCTION("Inner", {}); }
+  {
    RECORD_FUNCTION("Inner", {});
  }
  at::clearCallbacks();
  ASSERT_FALSE(at::hasCallbacks());
--- a/test/cpp/tensorexpr/test_external_calls.cpp
+++ b/test/cpp/tensorexpr/test_external_calls.cpp
@ -716,14 +716,13 @@ TEST(ExternalCall, UnaryFloat) {
      std::string,
      std::vector<ExprHandle>>;
  std::vector<Test> tests = {};
-  tests.push_back(Test{// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
+  tests.push_back(Test{
-                       {1, 64, 8, 9},
+      // NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
-                       {1, 64, 5, 7},
+      {1, 64, 8, 9},
-                       [](at::Tensor x) {
+      {1, 64, 5, 7},
-                         return at::adaptive_avg_pool2d(x, {5, 7});
+      [](at::Tensor x) { return at::adaptive_avg_pool2d(x, {5, 7}); },
-                       },
+      "nnc_aten_adaptive_avg_pool2d",
-                       "nnc_aten_adaptive_avg_pool2d",
+      toExprHandleVec({5, 7})});
                       toExprHandleVec({5, 7})});
  tests.push_back(Test{// NOLINTNEXTLINE(cppcoreguidelines-avoid-magic-numbers)
                       {100, 200},
                       {100},
--- a/tools/linter/adapters/s3_init_config.json
+++ b/tools/linter/adapters/s3_init_config.json
@ -8,16 +8,16 @@
    ],
    "clang-format": {
        "Darwin-arm": {
-            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-arm/17.0.6/clang-format",
+            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-arm/19.1.4/clang-format",
-            "hash": "47c47f3c8275fd6e25d07128ef9a655d3f898eae6a59a7c7a801967871bdb2f7"
+            "hash": "f0da3ecf0ab1e9b50e8c27bd2d7ca0baa619e2f4b824b35d79d46356581fa552"
        },
        "Darwin-i386": {
-            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-i386/17.0.6/clang-format",
+            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/macos-i386/19.1.4/clang-format",
-            "hash": "23423cbe62feb535c05c88e6f47e420ca2777603f90dff4d33d19b6f5177a79e"
+            "hash": "f5eb5037b9aa9d1d2de650fb2e0fe1a2517768a462fae8e98791a67b698302f4"
        },
        "Linux": {
-            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/17.0.6/clang-format",
+            "download_url": "https://oss-clang-format.s3.us-east-2.amazonaws.com/linux64/19.1.4/clang-format",
-            "hash": "920159a0fafc7c65f6819e8a0b739ecc8e655f50f20a3a1db975a3473b86431b"
+            "hash": "bfa9ef6eccb372f79ffcb6196af966fd84519ea9567f5ae7b6ad30208cd82109"
        }
    },
    "clang-tidy": {
--- a/torch/csrc/DataLoader.cpp
+++ b/torch/csrc/DataLoader.cpp
@ -37,7 +37,7 @@ using namespace torch;
    auto _w =                                                              \
        write(STDERR_FILENO, ERROR_MSG, sizeof(ERROR_MSG) / sizeof(char)); \
    (void)_w;                                                              \
-    struct sigaction sa {};                                                \
+    struct sigaction sa{};                                                 \
    sa.sa_handler = SIG_DFL;                                               \
    sa.sa_flags = 0;                                                       \
    if (sigemptyset(&sa.sa_mask) != 0 ||                                   \
@ -54,7 +54,7 @@ static void setSignalHandler(
    int signal,
    void (*handler)(int, siginfo_t*, void*),
    struct sigaction* old_sa_ptr) {
-  struct sigaction sa {};
+  struct sigaction sa{};
  sa.sa_sigaction = handler;
  sa.sa_flags = SA_RESTART | SA_SIGINFO | SA_NOCLDSTOP | SA_NODEFER;
  if (sigemptyset(&sa.sa_mask) != 0 ||
@ -92,7 +92,7 @@ static void handler_SIGTERM(int sig, siginfo_t* info, void* ctx) {
  if (info->si_pid == getppid()) {
    _exit(EXIT_SUCCESS);
  }
-  struct sigaction sa {};
+  struct sigaction sa{};
  sa.sa_handler = SIG_DFL;
  sa.sa_flags = 0;
  if (sigemptyset(&sa.sa_mask) != 0 || sigaction(SIGTERM, &sa, nullptr) != 0) {
--- a/torch/csrc/api/include/torch/detail/static.h
+++ b/torch/csrc/api/include/torch/detail/static.h
@ -40,10 +40,9 @@ struct has_forward {
 template <typename Head = void, typename... Tail>
 constexpr bool check_not_lvalue_references() {
-  return (
+  return (!std::is_lvalue_reference_v<Head> ||
-      !std::is_lvalue_reference_v<Head> ||
+          std::is_const_v<std::remove_reference_t<Head>>) &&
-      std::is_const_v<std::remove_reference_t<
+      check_not_lvalue_references<Tail...>();
          Head>>)&&check_not_lvalue_references<Tail...>();
 }
 template <>
--- a/torch/csrc/api/include/torch/nn/parallel/data_parallel.h
+++ b/torch/csrc/api/include/torch/nn/parallel/data_parallel.h
@ -59,7 +59,7 @@ namespace {
 // in data parallel, and should not be exposed as a user API.
 struct ReduceAdd : public autograd::Node {
  explicit ReduceAdd(const at::Device& destination_device)
-      : destination_device_(destination_device){};
+      : destination_device_(destination_device) {};
  ~ReduceAdd() override = default;
  // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
--- a/torch/csrc/autograd/functions/init.cpp
+++ b/torch/csrc/autograd/functions/init.cpp
@ -71,7 +71,7 @@ template <
    typename T,
    typename ValueT,
    typename ParamsT,
-    ValueT ParamsT::*ptr,
+    ValueT ParamsT::* ptr,
    typename ConvertArgT,
    PyObject* (*Convert)(ConvertArgT)>
 static PyObject* getTupleAttr(PyObject* obj, void* _unused) {
@ -93,7 +93,7 @@ template <
    typename T,
    typename ValueT,
    typename ParamsT,
-    ValueT ParamsT::*ptr,
+    ValueT ParamsT::* ptr,
    typename ConvertArgT,
    PyObject* (*Convert)(ConvertArgT)>
 static PyObject* getValueAttr(PyObject* obj, void* _unused) {
--- a/torch/csrc/autograd/jit_decomp_interface.h
+++ b/torch/csrc/autograd/jit_decomp_interface.h
@ -41,10 +41,10 @@ struct TORCH_API JitDecompInterface {
 TORCH_API void setJitDecompImpl(JitDecompInterface* impl);
 TORCH_API JitDecompInterface* getJitDecompImpl();
-struct TORCH_API JitDecompRegisterer {
+struct TORCH_API JitDecompRegisterer{explicit JitDecompRegisterer(
-  explicit JitDecompRegisterer(JitDecompInterface* impl) {
+    JitDecompInterface * impl){setJitDecompImpl(impl);
-    setJitDecompImpl(impl);
+} // namespace torch::autograd::impl
-  }
+}
-};
+;
 } // namespace torch::autograd::impl
--- a/torch/csrc/autograd/python_function.cpp
+++ b/torch/csrc/autograd/python_function.cpp
@ -1625,7 +1625,7 @@ using setter = int (*)(PyObject*, PyObject*, void*);
 namespace {
-template <PyObject* THPFunction::*ptr>
+template <PyObject* THPFunction::* ptr>
 PyObject* getObject(PyObject* obj, void* _unused) {
  auto self = (THPFunction*)obj;
  PyObject* value = self->*ptr;
@ -1636,7 +1636,7 @@ PyObject* getObject(PyObject* obj, void* _unused) {
  return value;
 }
-template <PyObject* THPFunction::*ptr>
+template <PyObject* THPFunction::* ptr>
 int setObject(PyObject* obj, PyObject* value, void* _unused) {
  auto self = (THPFunction*)obj;
  if (value == Py_None) {
@ -1648,13 +1648,13 @@ int setObject(PyObject* obj, PyObject* value, void* _unused) {
  return 0;
 }
-template <typename M, M THPFunction::*ptr, PyObject* (*Convert)(long)>
+template <typename M, M THPFunction::* ptr, PyObject* (*Convert)(long)>
 PyObject* getMember(PyObject* obj, void* _unused) {
  auto self = (THPFunction*)obj;
  return Convert(self->*ptr);
 }
-template <typename M, M autograd::Node::*ptr, PyObject* (*Convert)(long)>
+template <typename M, M autograd::Node::* ptr, PyObject* (*Convert)(long)>
 PyObject* getImplMember(PyObject* obj, void* _unused) {
  auto self = (THPFunction*)obj;
  return Convert(self->cdata.*ptr);
--- a/torch/csrc/distributed/c10d/CUDASymmetricMemory.hpp
+++ b/torch/csrc/distributed/c10d/CUDASymmetricMemory.hpp
@ -37,7 +37,7 @@ class CUDASymmetricMemory : public SymmetricMemory {
      int rank,
      int world_size);
-  ~CUDASymmetricMemory() override{};
+  ~CUDASymmetricMemory() override {};
  std::vector<void*> get_buffer_ptrs() override;
  std::vector<void*> get_signal_pad_ptrs() override;
--- a/torch/csrc/distributed/c10d/CUDASymmetricMemoryUtils.cpp
+++ b/torch/csrc/distributed/c10d/CUDASymmetricMemoryUtils.cpp
@ -85,14 +85,16 @@ void IpcChannel::send_fd(int dst_pid, int fd) {
  memset(cbuf, 0, sizeof(cbuf));
  // Create message header
-  struct msghdr msg {
+  struct msghdr msg{
-    // destination socket address and size of it
+      // destination socket address and size of it
-    // message content in msg_iov and number of such structs (1 in our case)
+      // message content in msg_iov and number of such structs (1 in our case)
-    // auxiliary data with the value of fd and size of it
+      // auxiliary data with the value of fd and size of it
-    .msg_name = (void*)&addr, .msg_namelen = sizeof(struct sockaddr_un),
+      .msg_name = (void*)&addr,
-    .msg_iov = &io, .msg_iovlen = 1, .msg_control = cbuf,
+      .msg_namelen = sizeof(struct sockaddr_un),
-    .msg_controllen = sizeof(cbuf)
+      .msg_iov = &io,
-  };
+      .msg_iovlen = 1,
      .msg_control = cbuf,
      .msg_controllen = sizeof(cbuf)};
  // This points to the first control message header
  // With SCM_RIGHTS we let the kernel know that we are passing file
--- a/torch/csrc/distributed/c10d/Ops.cpp
+++ b/torch/csrc/distributed/c10d/Ops.cpp
@ -136,25 +136,26 @@ IMPL_REDUCE(CPU)
 IMPL_REDUCE(CUDA)
 IMPL_REDUCE(PrivateUse1)
-#define IMPL_BROADCAST(DEV)                                                   \
+#define IMPL_BROADCAST(DEV)                                               \
-  std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>               \
+  std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>           \
-      broadcast_##DEV(                                                        \
+      broadcast_##DEV(                                                    \
-          at::TensorList tensors,                                             \
+          at::TensorList tensors,                                         \
-          const c10::intrusive_ptr<ProcessGroup>& process_group,              \
+          const c10::intrusive_ptr<ProcessGroup>& process_group,          \
-          int64_t root_rank,                                                  \
+          int64_t root_rank,                                              \
-          int64_t root_tensor,                                                \
+          int64_t root_tensor,                                            \
-          bool asyncOp,                                                       \
+          bool asyncOp,                                                   \
-          int64_t timeout) {                                                  \
+          int64_t timeout) {                                              \
-    auto tensor_vec = tensors.vec();                                          \
+    auto tensor_vec = tensors.vec();                                      \
-    auto work = process_group->getBackend(c10::DeviceType::DEV) -> broadcast( \
+    auto work = process_group->getBackend(c10::DeviceType::DEV)           \
-        tensor_vec,                                                           \
+                    ->broadcast(                                          \
-        BroadcastOptions{                                                     \
+                        tensor_vec,                                       \
-            root_rank,                                                        \
+                        BroadcastOptions{                                 \
-            root_tensor,                                                      \
+                            root_rank,                                    \
-            std::chrono::milliseconds(timeout),                               \
+                            root_tensor,                                  \
-            asyncOp});                                                        \
+                            std::chrono::milliseconds(timeout),           \
-    return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>(     \
+                            asyncOp});                                    \
-        std::move(tensor_vec), work);                                         \
+    return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
        std::move(tensor_vec), work);                                     \
  }
 IMPL_BROADCAST(CPU)
@ -164,22 +165,25 @@ IMPL_BROADCAST(PrivateUse1)
 // Return input tensors as output tensors to make inplace allreduce look like
 // a functional API, so that make_fx can correctly build the dependencies in
 // the graph later.
-#define IMPL_ALLREDUCE(DEV)                                                   \
+#define IMPL_ALLREDUCE(DEV)                                               \
-  std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>               \
+  std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>           \
-      allreduce_##DEV(                                                        \
+      allreduce_##DEV(                                                    \
-          at::TensorList tensors,                                             \
+          at::TensorList tensors,                                         \
-          const c10::intrusive_ptr<ProcessGroup>& process_group,              \
+          const c10::intrusive_ptr<ProcessGroup>& process_group,          \
-          const c10::intrusive_ptr<ReduceOp>& reduce_op,                      \
+          const c10::intrusive_ptr<ReduceOp>& reduce_op,                  \
-          const std::optional<at::Tensor>& sparse_indices,                    \
+          const std::optional<at::Tensor>& sparse_indices,                \
-          bool asyncOp,                                                       \
+          bool asyncOp,                                                   \
-          int64_t timeout) {                                                  \
+          int64_t timeout) {                                              \
-    auto tensor_vec = tensors.vec();                                          \
+    auto tensor_vec = tensors.vec();                                      \
-    auto work = process_group->getBackend(c10::DeviceType::DEV) -> allreduce( \
+    auto work = process_group->getBackend(c10::DeviceType::DEV)           \
-        tensor_vec,                                                           \
+                    ->allreduce(                                          \
-        AllreduceOptions{                                                     \
+                        tensor_vec,                                       \
-            *reduce_op.get(), std::chrono::milliseconds(timeout), asyncOp});  \
+                        AllreduceOptions{                                 \
-    return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>(     \
+                            *reduce_op.get(),                             \
-        std::move(tensor_vec), work);                                         \
+                            std::chrono::milliseconds(timeout),           \
                            asyncOp});                                    \
    return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>( \
        std::move(tensor_vec), work);                                     \
  }
 IMPL_ALLREDUCE(CPU)
@ -217,10 +221,13 @@ IMPL_ALLREDUCE_COALESCED(PrivateUse1)
          bool asyncOp,                                                        \
          int64_t timeout) {                                                   \
    auto input_tensors_vec = input_tensors.vec();                              \
-    auto work = process_group->getBackend(c10::DeviceType::DEV) -> allgather(  \
+    auto work = process_group->getBackend(c10::DeviceType::DEV)                \
-        const_cast<std::vector<std::vector<at::Tensor>>&>(output_tensors),     \
+                    ->allgather(                                               \
-        input_tensors_vec,                                                     \
+                        const_cast<std::vector<std::vector<at::Tensor>>&>(     \
-        AllgatherOptions{std::chrono::milliseconds(timeout), asyncOp});        \
+                            output_tensors),                                   \
                        input_tensors_vec,                                     \
                        AllgatherOptions{                                      \
                            std::chrono::milliseconds(timeout), asyncOp});     \
    return std::                                                               \
        tuple<std::vector<std::vector<at::Tensor>>, c10::intrusive_ptr<Work>>( \
            output_tensors, work);                                             \
@ -231,20 +238,21 @@ IMPL_ALLGATHER(CPU)
 IMPL_ALLGATHER(CUDA)
 IMPL_ALLGATHER(PrivateUse1)
-#define IMPL__ALLGATHER_BASE(DEV)                                           \
+#define IMPL__ALLGATHER_BASE(DEV)                                          \
-  std::tuple<at::Tensor, c10::intrusive_ptr<Work>> _allgather_base_##DEV(   \
+  std::tuple<at::Tensor, c10::intrusive_ptr<Work>> _allgather_base_##DEV(  \
-      at::Tensor& output_tensor,                                            \
+      at::Tensor& output_tensor,                                           \
-      at::Tensor& input_tensor,                                             \
+      at::Tensor& input_tensor,                                            \
-      const c10::intrusive_ptr<ProcessGroup>& process_group,                \
+      const c10::intrusive_ptr<ProcessGroup>& process_group,               \
-      bool asyncOp,                                                         \
+      bool asyncOp,                                                        \
-      int64_t timeout) {                                                    \
+      int64_t timeout) {                                                   \
-    auto work =                                                             \
+    auto work = process_group->getBackend(c10::DeviceType::DEV)            \
-        process_group->getBackend(c10::DeviceType::DEV) -> _allgather_base( \
+                    ->_allgather_base(                                     \
-            output_tensor,                                                  \
+                        output_tensor,                                     \
-            input_tensor,                                                   \
+                        input_tensor,                                      \
-            AllgatherOptions{std::chrono::milliseconds(timeout), asyncOp}); \
+                        AllgatherOptions{                                  \
-    return std::tuple<at::Tensor, c10::intrusive_ptr<Work>>(                \
+                            std::chrono::milliseconds(timeout), asyncOp}); \
-        output_tensor, work);                                               \
+    return std::tuple<at::Tensor, c10::intrusive_ptr<Work>>(               \
        output_tensor, work);                                              \
  }
 IMPL__ALLGATHER_BASE(CPU)
@ -289,26 +297,27 @@ IMPL_ALLGATHER_INTO_TENSOR_COALESCED(CPU)
 IMPL_ALLGATHER_INTO_TENSOR_COALESCED(CUDA)
 IMPL_ALLGATHER_INTO_TENSOR_COALESCED(PrivateUse1)
-#define IMPL_REDUCE_SCATTER(DEV)                                              \
+#define IMPL_REDUCE_SCATTER(DEV)                                           \
-  std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>               \
+  std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>            \
-      reduce_scatter_##DEV(                                                   \
+      reduce_scatter_##DEV(                                                \
-          const at::TensorList& output_tensors,                               \
+          const at::TensorList& output_tensors,                            \
-          const std::vector<std::vector<at::Tensor>>& input_tensors,          \
+          const std::vector<std::vector<at::Tensor>>& input_tensors,       \
-          const c10::intrusive_ptr<ProcessGroup>& process_group,              \
+          const c10::intrusive_ptr<ProcessGroup>& process_group,           \
-          const c10::intrusive_ptr<ReduceOp>& reduce_op,                      \
+          const c10::intrusive_ptr<ReduceOp>& reduce_op,                   \
-          bool asyncOp,                                                       \
+          bool asyncOp,                                                    \
-          int64_t timeout) {                                                  \
+          int64_t timeout) {                                               \
-    auto output_tensors_vec = output_tensors.vec();                           \
+    auto output_tensors_vec = output_tensors.vec();                        \
-    auto work =                                                               \
+    auto work = process_group->getBackend(c10::DeviceType::DEV)            \
-        process_group->getBackend(c10::DeviceType::DEV) -> reduce_scatter(    \
+                    ->reduce_scatter(                                      \
-            output_tensors_vec,                                               \
+                        output_tensors_vec,                                \
-            const_cast<std::vector<std::vector<at::Tensor>>&>(input_tensors), \
+                        const_cast<std::vector<std::vector<at::Tensor>>&>( \
-            ReduceScatterOptions{                                             \
+                            input_tensors),                                \
-                *reduce_op.get(),                                             \
+                        ReduceScatterOptions{                              \
-                std::chrono::milliseconds(timeout),                           \
+                            *reduce_op.get(),                              \
-                asyncOp});                                                    \
+                            std::chrono::milliseconds(timeout),            \
-    return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>(     \
+                            asyncOp});                                     \
-        output_tensors_vec, work);                                            \
+    return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>(  \
        output_tensors_vec, work);                                         \
  }
 IMPL_REDUCE_SCATTER(CPU)
@ -324,7 +333,7 @@ IMPL_REDUCE_SCATTER(PrivateUse1)
      bool asyncOp,                                                            \
      int64_t timeout) {                                                       \
    auto work = process_group->getBackend(c10::DeviceType::DEV)                \
-                    -> _reduce_scatter_base(                                   \
+                    ->_reduce_scatter_base(                                    \
                        output_tensor,                                         \
                        input_tensor,                                          \
                        ReduceScatterOptions{                                  \
@ -393,11 +402,14 @@ IMPL_GATHER(PrivateUse1)
      bool asyncOp,                                                            \
      int64_t timeout) {                                                       \
    auto output_tensors_vec = output_tensors.vec();                            \
-    auto work = process_group->getBackend(c10::DeviceType::DEV) -> scatter(    \
+    auto work =                                                                \
-        output_tensors_vec,                                                    \
+        process_group->getBackend(c10::DeviceType::DEV)                        \
-        const_cast<std::vector<std::vector<at::Tensor>>&>(input_tensors),      \
+            ->scatter(                                                         \
-        ScatterOptions{                                                        \
+                output_tensors_vec,                                            \
-            root_rank, std::chrono::milliseconds(timeout), asyncOp});          \
+                const_cast<std::vector<std::vector<at::Tensor>>&>(             \
                    input_tensors),                                            \
                ScatterOptions{                                                \
                    root_rank, std::chrono::milliseconds(timeout), asyncOp});  \
    return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>(      \
        std::move(output_tensors_vec), work);                                  \
  }
@ -406,22 +418,24 @@ IMPL_SCATTER(CPU)
 IMPL_SCATTER(CUDA)
 IMPL_SCATTER(PrivateUse1)
-#define IMPL_ALLTOALL(DEV)                                                   \
+#define IMPL_ALLTOALL(DEV)                                                     \
-  std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>              \
+  std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>                \
-      alltoall_##DEV(                                                        \
+      alltoall_##DEV(                                                          \
-          const at::TensorList& output_tensors,                              \
+          const at::TensorList& output_tensors,                                \
-          const at::TensorList& input_tensors,                               \
+          const at::TensorList& input_tensors,                                 \
-          const c10::intrusive_ptr<ProcessGroup>& process_group,             \
+          const c10::intrusive_ptr<ProcessGroup>& process_group,               \
-          bool asyncOp,                                                      \
+          bool asyncOp,                                                        \
-          int64_t timeout) {                                                 \
+          int64_t timeout) {                                                   \
-    auto output_tensors_vec = output_tensors.vec();                          \
+    auto output_tensors_vec = output_tensors.vec();                            \
-    auto input_tensors_vec = input_tensors.vec();                            \
+    auto input_tensors_vec = input_tensors.vec();                              \
-    auto work = process_group->getBackend(c10::DeviceType::DEV) -> alltoall( \
+    auto work =                                                                \
-        output_tensors_vec,                                                  \
+        process_group->getBackend(c10::DeviceType::DEV)                        \
-        input_tensors_vec,                                                   \
+            ->alltoall(                                                        \
-        AllToAllOptions{std::chrono::milliseconds(timeout), asyncOp});       \
+                output_tensors_vec,                                            \
-    return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>(    \
+                input_tensors_vec,                                             \
-        std::move(output_tensors_vec), work);                                \
+                AllToAllOptions{std::chrono::milliseconds(timeout), asyncOp}); \
    return std::tuple<std::vector<at::Tensor>, c10::intrusive_ptr<Work>>(      \
        std::move(output_tensors_vec), work);                                  \
  }
 IMPL_ALLTOALL(CPU)
--- a/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp
+++ b/torch/csrc/distributed/c10d/ProcessGroupGloo.cpp
@ -435,7 +435,7 @@ void socketInitialize() {
 // gracefully fall back to an alternative if it doesn't.
 bool doesHostnameResolveToUsableAddress(const std::string& hostname) {
  socketInitialize();
-  struct addrinfo hints {};
+  struct addrinfo hints{};
  hints.ai_family = AF_UNSPEC;
  hints.ai_socktype = SOCK_STREAM;
  struct addrinfo* result = nullptr;
--- a/torch/csrc/distributed/c10d/TCPStoreLibUvBackend.cpp
+++ b/torch/csrc/distributed/c10d/TCPStoreLibUvBackend.cpp
@ -155,7 +155,7 @@ class UvTcpSocket : public UvHandle {
  }
  void startRead() {
-    struct ::sockaddr_storage addr {};
+    struct ::sockaddr_storage addr{};
    int addrLen{sizeof(struct ::sockaddr_storage)};
    if (int err = uv_tcp_getpeername(
@ -263,7 +263,7 @@ class UvTcpServer : public UvTcpSocket {
    auto res = c10::make_intrusive<UvTcpServer>(loop);
    res->handleReady();
    try {
-      struct sockaddr_storage addr {};
+      struct sockaddr_storage addr{};
      int uv_res = 0;
      if (useIpv6) {
        uv_res = uv_ip6_addr("::", port, (struct sockaddr_in6*)&addr);
--- a/torch/csrc/distributed/c10d/control_plane/PythonHandlers.cpp
+++ b/torch/csrc/distributed/c10d/control_plane/PythonHandlers.cpp
@ -40,5 +40,5 @@ RegisterHandler tracebackHandler{
      res.setContent(std::move(file_contents), "text/plain");
    }};
-}
+} // namespace
 } // namespace c10d::control_plane
--- a/torch/csrc/distributed/rpc/init.cpp
+++ b/torch/csrc/distributed/rpc/init.cpp
@ -631,8 +631,8 @@ PyObject* rpc_init(PyObject* _unused, PyObject* noargs) {
          py::call_guard<py::gil_scoped_release>())
      .def(
          "_get_device_map",
-          (DeviceMap(TensorPipeAgent::*)(const WorkerInfo& dst) const) &
+          (DeviceMap(TensorPipeAgent::*)(const WorkerInfo& dst)
-              TensorPipeAgent::getDeviceMap,
+               const)&TensorPipeAgent::getDeviceMap,
          py::call_guard<py::gil_scoped_release>())
      .def(
          "_get_backend_options",
--- a/torch/csrc/distributed/rpc/python_rpc_handler.cpp
+++ b/torch/csrc/distributed/rpc/python_rpc_handler.cpp
@ -14,7 +14,7 @@ constexpr auto kInternalModule = "torch.distributed.rpc.internal";
 #define PROFILE_GIL_SCOPED_ACQUIRE                                       \
  std::chrono::time_point<std::chrono::high_resolution_clock> startTime; \
  auto shouldProfileGIL =                                                \
-      RpcAgent::getCurrentRpcAgent() -> isGILProfilingEnabled();         \
+      RpcAgent::getCurrentRpcAgent()->isGILProfilingEnabled();           \
  if (shouldProfileGIL) {                                                \
    startTime = std::chrono::high_resolution_clock::now();               \
  }                                                                      \
--- a/torch/csrc/inductor/aoti_package/model_package_loader.cpp
+++ b/torch/csrc/inductor/aoti_package/model_package_loader.cpp
@ -39,7 +39,7 @@ bool file_exists(const std::string& path) {
 #ifdef _WIN32
  return fs::exists(path);
 #else
-  struct stat rc {};
+  struct stat rc{};
  return lstat(path.c_str(), &rc) == 0;
 #endif
 }
@ -217,7 +217,7 @@ bool recursive_rmdir(const std::string& path) {
  }
  struct dirent* entry = nullptr;
-  struct stat statbuf {};
+  struct stat statbuf{};
  bool success = true;
  // Iterate through directory entries
--- a/torch/csrc/inductor/aoti_runner/model_container_runner.cpp
+++ b/torch/csrc/inductor/aoti_runner/model_container_runner.cpp
@ -17,7 +17,7 @@ bool file_exists(std::string& path) {
 #ifdef _WIN32
  return fs::exists(path);
 #else
-  struct stat rc {};
+  struct stat rc{};
  return lstat(path.c_str(), &rc) == 0;
 #endif
 }
--- a/torch/csrc/inductor/aoti_runner/model_container_runner.h
+++ b/torch/csrc/inductor/aoti_runner/model_container_runner.h
@ -123,13 +123,13 @@ getAOTIModelRunnerRegistry();
 // To register a new external backend in AOTI one needs to create an instance of
 // this struct. It is not thread-safe. Becase it is expected to be called during
 // the initialization of the program.
-struct TORCH_API RegisterAOTIModelRunner {
+struct TORCH_API RegisterAOTIModelRunner{RegisterAOTIModelRunner(
-  RegisterAOTIModelRunner(
+    const std::string& name,
-      const std::string& name,
+    CreateAOTIModelRunnerFunc create_aoti_model_runner_fn){
      CreateAOTIModelRunnerFunc create_aoti_model_runner_fn) {
    getAOTIModelRunnerRegistry()[name] = create_aoti_model_runner_fn;
-  }
+} // namespace torch::inductor
-};
+}
 ;
 } // namespace torch::inductor
 #endif
--- a/torch/csrc/inductor/aoti_torch/shim_common.cpp
+++ b/torch/csrc/inductor/aoti_torch/shim_common.cpp
@ -87,7 +87,7 @@ bool file_exists(std::string& path) {
 #ifdef _WIN32
  return fs::exists(path);
 #else
-  struct stat rc {};
+  struct stat rc{};
  return lstat(path.c_str(), &rc) == 0;
 #endif
 }
--- a/torch/csrc/inductor/cpp_prefix.h
+++ b/torch/csrc/inductor/cpp_prefix.h
@ -358,7 +358,7 @@ struct IndexValueVec {
    index = at::vec::VectorizedN<int64_t, NI>(0);
  };
-  IndexValueVec(){};
+  IndexValueVec() {};
 };
 template <
--- a/torch/csrc/inductor/static_cuda_launcher.cpp
+++ b/torch/csrc/inductor/static_cuda_launcher.cpp
@ -451,7 +451,7 @@ std::array<PyMethodDef, 2> StaticCudaLauncherMethods = {
 // We don't implement __new__ or __init__ because we're using it only as a
 // container for static methods.
 PyTypeObject StaticCudaLauncherType = {
-      PyVarObject_HEAD_INIT(nullptr, 0)
+    PyVarObject_HEAD_INIT(nullptr, 0)
    "torch._C._StaticCudaLauncher", // tp_name
    sizeof(PyObject), // tp_basicsize
    0, // tp_itemsize
--- a/torch/csrc/jit/codegen/fuser/compiler.h
+++ b/torch/csrc/jit/codegen/fuser/compiler.h
@ -45,12 +45,12 @@ TORCH_API void registerFusionBackend(
    at::Device::Type backend_type,
    FusedKernelConstructor ctor);
 TORCH_API bool hasFusionBackend(at::Device::Type backend_type);
-struct TORCH_API RegisterFusionBackend {
+struct TORCH_API RegisterFusionBackend{RegisterFusionBackend(
-  RegisterFusionBackend(
+    at::Device::Type backend_type,
-      at::Device::Type backend_type,
+    FusedKernelConstructor ctor){
      FusedKernelConstructor ctor) {
    registerFusionBackend(backend_type, std::move(ctor));
-  }
+} // namespace torch::jit::fuser
-};
+}
 ;
 } // namespace torch::jit::fuser
--- a/torch/csrc/jit/mobile/file_format.h
+++ b/torch/csrc/jit/mobile/file_format.h
@ -134,7 +134,7 @@ static inline std::tuple<std::shared_ptr<char>, size_t> get_file_content(
    // failed to open file, chances are it's no such file or directory.
    file_not_found_error();
  }
-  struct stat statbuf {};
+  struct stat statbuf{};
  fstat(fd, &statbuf);
  size_t size = statbuf.st_size;
  void* ptr = mmap(nullptr, statbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
--- a/torch/csrc/jit/tensorexpr/intrinsic_symbols.cpp
+++ b/torch/csrc/jit/tensorexpr/intrinsic_symbols.cpp
@ -131,153 +131,153 @@ namespace torch::jit::tensorexpr {
 c10::ArrayRef<SymbolAddress> getIntrinsicSymbols() {
  static SymbolAddress symbolAddresses[] = {
-    {"log10f", reinterpret_cast<void*>(&log10f)},
+      {"log10f", reinterpret_cast<void*>(&log10f)},
-    {"log1pf", reinterpret_cast<void*>(&log1pf)},
+      {"log1pf", reinterpret_cast<void*>(&log1pf)},
-    {"logf", reinterpret_cast<void*>(&logf)},
+      {"logf", reinterpret_cast<void*>(&logf)},
-    {"log2f", reinterpret_cast<void*>(&log2f)},
+      {"log2f", reinterpret_cast<void*>(&log2f)},
-    {"expf", reinterpret_cast<void*>(&expf)},
+      {"expf", reinterpret_cast<void*>(&expf)},
-    {"erff", reinterpret_cast<void*>(&erff)},
+      {"erff", reinterpret_cast<void*>(&erff)},
-    {"cosf", reinterpret_cast<void*>(&cosf)},
+      {"cosf", reinterpret_cast<void*>(&cosf)},
-    {"sinf", reinterpret_cast<void*>(&sinf)},
+      {"sinf", reinterpret_cast<void*>(&sinf)},
-    {"tanf", reinterpret_cast<void*>(&tanf)},
+      {"tanf", reinterpret_cast<void*>(&tanf)},
-    {"acosf", reinterpret_cast<void*>(&acosf)},
+      {"acosf", reinterpret_cast<void*>(&acosf)},
-    {"asinf", reinterpret_cast<void*>(&asinf)},
+      {"asinf", reinterpret_cast<void*>(&asinf)},
-    {"atanf", reinterpret_cast<void*>(&atanf)},
+      {"atanf", reinterpret_cast<void*>(&atanf)},
-    {"coshf", reinterpret_cast<void*>(&coshf)},
+      {"coshf", reinterpret_cast<void*>(&coshf)},
-    {"sinhf", reinterpret_cast<void*>(&sinhf)},
+      {"sinhf", reinterpret_cast<void*>(&sinhf)},
-    {"tanhf", reinterpret_cast<void*>(&tanhf)},
+      {"tanhf", reinterpret_cast<void*>(&tanhf)},
-    {"sqrtf", reinterpret_cast<void*>(&sqrtf)},
+      {"sqrtf", reinterpret_cast<void*>(&sqrtf)},
-    {"fabsf", reinterpret_cast<void*>(&fabsf)},
+      {"fabsf", reinterpret_cast<void*>(&fabsf)},
-    {"floorf", reinterpret_cast<void*>(&floorf)},
+      {"floorf", reinterpret_cast<void*>(&floorf)},
-    {"ceilf", reinterpret_cast<void*>(&ceilf)},
+      {"ceilf", reinterpret_cast<void*>(&ceilf)},
-    {"roundf", reinterpret_cast<void*>(&roundf)},
+      {"roundf", reinterpret_cast<void*>(&roundf)},
-    {"truncf", reinterpret_cast<void*>(&truncf)},
+      {"truncf", reinterpret_cast<void*>(&truncf)},
-    {"atan2f", reinterpret_cast<void*>(&atan2f)},
+      {"atan2f", reinterpret_cast<void*>(&atan2f)},
-    {"fmodf", reinterpret_cast<void*>(&fmodf)},
+      {"fmodf", reinterpret_cast<void*>(&fmodf)},
-    {"remainderf", reinterpret_cast<void*>(&remainderf)},
+      {"remainderf", reinterpret_cast<void*>(&remainderf)},
-    // float -> half & half -> float conversions
+      // float -> half & half -> float conversions
-    {"__gnu_h2f_ieee",
+      {"__gnu_h2f_ieee",
-     reinterpret_cast<void*>(&c10::detail::fp16_ieee_to_fp32_value)},
+       reinterpret_cast<void*>(&c10::detail::fp16_ieee_to_fp32_value)},
-    {"__gnu_f2h_ieee",
+      {"__gnu_f2h_ieee",
-     reinterpret_cast<void*>(&c10::detail::fp16_ieee_from_fp32_value)},
+       reinterpret_cast<void*>(&c10::detail::fp16_ieee_from_fp32_value)},
 #if !defined(_MSC_VER) && defined(__x86_64__)
-    // FP32 Sleef functions -- SSE
+      // FP32 Sleef functions -- SSE
-    {"Sleef_acosf4", reinterpret_cast<void*>(&Sleef_acosf4_u10)},
+      {"Sleef_acosf4", reinterpret_cast<void*>(&Sleef_acosf4_u10)},
-    {"Sleef_asinf4", reinterpret_cast<void*>(&Sleef_asinf4_u10)},
+      {"Sleef_asinf4", reinterpret_cast<void*>(&Sleef_asinf4_u10)},
-    {"Sleef_atanf4", reinterpret_cast<void*>(&Sleef_atanf4_u10)},
+      {"Sleef_atanf4", reinterpret_cast<void*>(&Sleef_atanf4_u10)},
-    {"Sleef_cosf4", reinterpret_cast<void*>(&Sleef_cosf4_u10)},
+      {"Sleef_cosf4", reinterpret_cast<void*>(&Sleef_cosf4_u10)},
-    {"Sleef_sinf4", reinterpret_cast<void*>(&Sleef_sinf4_u10)},
+      {"Sleef_sinf4", reinterpret_cast<void*>(&Sleef_sinf4_u10)},
-    {"Sleef_tanf4", reinterpret_cast<void*>(&Sleef_tanf4_u10)},
+      {"Sleef_tanf4", reinterpret_cast<void*>(&Sleef_tanf4_u10)},
-    {"Sleef_coshf4", reinterpret_cast<void*>(&Sleef_coshf4_u10)},
+      {"Sleef_coshf4", reinterpret_cast<void*>(&Sleef_coshf4_u10)},
-    {"Sleef_sinhf4", reinterpret_cast<void*>(&Sleef_sinhf4_u10)},
+      {"Sleef_sinhf4", reinterpret_cast<void*>(&Sleef_sinhf4_u10)},
-    {"Sleef_tanhf4", reinterpret_cast<void*>(&Sleef_tanhf4_u10)},
+      {"Sleef_tanhf4", reinterpret_cast<void*>(&Sleef_tanhf4_u10)},
-    {"Sleef_erff4", reinterpret_cast<void*>(&Sleef_erff4_u10)},
+      {"Sleef_erff4", reinterpret_cast<void*>(&Sleef_erff4_u10)},
-    {"Sleef_erfcf4", reinterpret_cast<void*>(&Sleef_erfcf4_u15)},
+      {"Sleef_erfcf4", reinterpret_cast<void*>(&Sleef_erfcf4_u15)},
-    {"Sleef_expf4", reinterpret_cast<void*>(&Sleef_expf4_u10)},
+      {"Sleef_expf4", reinterpret_cast<void*>(&Sleef_expf4_u10)},
-    {"Sleef_expm1f4", reinterpret_cast<void*>(&Sleef_expm1f4_u10)},
+      {"Sleef_expm1f4", reinterpret_cast<void*>(&Sleef_expm1f4_u10)},
-    {"Sleef_logf4", reinterpret_cast<void*>(&Sleef_logf4_u10)},
+      {"Sleef_logf4", reinterpret_cast<void*>(&Sleef_logf4_u10)},
-    {"Sleef_log2f4", reinterpret_cast<void*>(&Sleef_log2f4_u10)},
+      {"Sleef_log2f4", reinterpret_cast<void*>(&Sleef_log2f4_u10)},
-    {"Sleef_log10f4", reinterpret_cast<void*>(&Sleef_log10f4_u10)},
+      {"Sleef_log10f4", reinterpret_cast<void*>(&Sleef_log10f4_u10)},
-    {"Sleef_log1pf4", reinterpret_cast<void*>(&Sleef_log1pf4_u10)},
+      {"Sleef_log1pf4", reinterpret_cast<void*>(&Sleef_log1pf4_u10)},
-    {"Sleef_sqrtf4", reinterpret_cast<void*>(&Sleef_sqrtf4_u05)},
+      {"Sleef_sqrtf4", reinterpret_cast<void*>(&Sleef_sqrtf4_u05)},
-    {"Sleef_fabsf4", reinterpret_cast<void*>(&Sleef_fabsf4)},
+      {"Sleef_fabsf4", reinterpret_cast<void*>(&Sleef_fabsf4)},
-    {"Sleef_floorf4", reinterpret_cast<void*>(&Sleef_floorf4)},
+      {"Sleef_floorf4", reinterpret_cast<void*>(&Sleef_floorf4)},
-    {"Sleef_ceilf4", reinterpret_cast<void*>(&Sleef_ceilf4)},
+      {"Sleef_ceilf4", reinterpret_cast<void*>(&Sleef_ceilf4)},
-    {"Sleef_truncf4", reinterpret_cast<void*>(&Sleef_truncf4)},
+      {"Sleef_truncf4", reinterpret_cast<void*>(&Sleef_truncf4)},
-    {"Sleef_roundf4", reinterpret_cast<void*>(&Sleef_roundf4)},
+      {"Sleef_roundf4", reinterpret_cast<void*>(&Sleef_roundf4)},
-    {"Sleef_lgammaf4", reinterpret_cast<void*>(&Sleef_lgammaf4_u10)},
+      {"Sleef_lgammaf4", reinterpret_cast<void*>(&Sleef_lgammaf4_u10)},
-    {"Sleef_atan2f4", reinterpret_cast<void*>(&Sleef_atan2f4_u10)},
+      {"Sleef_atan2f4", reinterpret_cast<void*>(&Sleef_atan2f4_u10)},
-    {"Sleef_powf4", reinterpret_cast<void*>(&Sleef_powf4_u10)},
+      {"Sleef_powf4", reinterpret_cast<void*>(&Sleef_powf4_u10)},
-    {"Sleef_fmodf4", reinterpret_cast<void*>(&Sleef_fmodf4)},
+      {"Sleef_fmodf4", reinterpret_cast<void*>(&Sleef_fmodf4)},
-    // FP32 Sleef functions -- AVX2
+      // FP32 Sleef functions -- AVX2
-    {"Sleef_acosf8", reinterpret_cast<void*>(&Sleef_acosf8_u10)},
+      {"Sleef_acosf8", reinterpret_cast<void*>(&Sleef_acosf8_u10)},
-    {"Sleef_asinf8", reinterpret_cast<void*>(&Sleef_asinf8_u10)},
+      {"Sleef_asinf8", reinterpret_cast<void*>(&Sleef_asinf8_u10)},
-    {"Sleef_atanf8", reinterpret_cast<void*>(&Sleef_atanf8_u10)},
+      {"Sleef_atanf8", reinterpret_cast<void*>(&Sleef_atanf8_u10)},
-    {"Sleef_cosf8", reinterpret_cast<void*>(&Sleef_cosf8_u10)},
+      {"Sleef_cosf8", reinterpret_cast<void*>(&Sleef_cosf8_u10)},
-    {"Sleef_sinf8", reinterpret_cast<void*>(&Sleef_sinf8_u10)},
+      {"Sleef_sinf8", reinterpret_cast<void*>(&Sleef_sinf8_u10)},
-    {"Sleef_tanf8", reinterpret_cast<void*>(&Sleef_tanf8_u10)},
+      {"Sleef_tanf8", reinterpret_cast<void*>(&Sleef_tanf8_u10)},
-    {"Sleef_coshf8", reinterpret_cast<void*>(&Sleef_coshf8_u10)},
+      {"Sleef_coshf8", reinterpret_cast<void*>(&Sleef_coshf8_u10)},
-    {"Sleef_sinhf8", reinterpret_cast<void*>(&Sleef_sinhf8_u10)},
+      {"Sleef_sinhf8", reinterpret_cast<void*>(&Sleef_sinhf8_u10)},
-    {"Sleef_tanhf8", reinterpret_cast<void*>(&Sleef_tanhf8_u10)},
+      {"Sleef_tanhf8", reinterpret_cast<void*>(&Sleef_tanhf8_u10)},
-    {"Sleef_erff8", reinterpret_cast<void*>(&Sleef_erff8_u10)},
+      {"Sleef_erff8", reinterpret_cast<void*>(&Sleef_erff8_u10)},
-    {"Sleef_erfcf8", reinterpret_cast<void*>(&Sleef_erfcf8_u15)},
+      {"Sleef_erfcf8", reinterpret_cast<void*>(&Sleef_erfcf8_u15)},
-    {"Sleef_expf8", reinterpret_cast<void*>(&Sleef_expf8_u10)},
+      {"Sleef_expf8", reinterpret_cast<void*>(&Sleef_expf8_u10)},
-    {"Sleef_expm1f8", reinterpret_cast<void*>(&Sleef_expm1f8_u10)},
+      {"Sleef_expm1f8", reinterpret_cast<void*>(&Sleef_expm1f8_u10)},
-    {"Sleef_logf8", reinterpret_cast<void*>(&Sleef_logf8_u10)},
+      {"Sleef_logf8", reinterpret_cast<void*>(&Sleef_logf8_u10)},
-    {"Sleef_log2f8", reinterpret_cast<void*>(&Sleef_log2f8_u10)},
+      {"Sleef_log2f8", reinterpret_cast<void*>(&Sleef_log2f8_u10)},
-    {"Sleef_log10f8", reinterpret_cast<void*>(&Sleef_log10f8_u10)},
+      {"Sleef_log10f8", reinterpret_cast<void*>(&Sleef_log10f8_u10)},
-    {"Sleef_log1pf8", reinterpret_cast<void*>(&Sleef_log1pf8_u10)},
+      {"Sleef_log1pf8", reinterpret_cast<void*>(&Sleef_log1pf8_u10)},
-    {"Sleef_sqrtf8", reinterpret_cast<void*>(&Sleef_sqrtf8_u05)},
+      {"Sleef_sqrtf8", reinterpret_cast<void*>(&Sleef_sqrtf8_u05)},
-    {"Sleef_fabsf8", reinterpret_cast<void*>(&Sleef_fabsf8)},
+      {"Sleef_fabsf8", reinterpret_cast<void*>(&Sleef_fabsf8)},
-    {"Sleef_floorf8", reinterpret_cast<void*>(&Sleef_floorf8)},
+      {"Sleef_floorf8", reinterpret_cast<void*>(&Sleef_floorf8)},
-    {"Sleef_ceilf8", reinterpret_cast<void*>(&Sleef_ceilf8)},
+      {"Sleef_ceilf8", reinterpret_cast<void*>(&Sleef_ceilf8)},
-    {"Sleef_truncf8", reinterpret_cast<void*>(&Sleef_truncf8)},
+      {"Sleef_truncf8", reinterpret_cast<void*>(&Sleef_truncf8)},
-    {"Sleef_roundf8", reinterpret_cast<void*>(&Sleef_roundf8)},
+      {"Sleef_roundf8", reinterpret_cast<void*>(&Sleef_roundf8)},
-    {"Sleef_lgammaf8", reinterpret_cast<void*>(&Sleef_lgammaf8_u10)},
+      {"Sleef_lgammaf8", reinterpret_cast<void*>(&Sleef_lgammaf8_u10)},
-    {"Sleef_atan2f8", reinterpret_cast<void*>(&Sleef_atan2f8_u10)},
+      {"Sleef_atan2f8", reinterpret_cast<void*>(&Sleef_atan2f8_u10)},
-    {"Sleef_powf8", reinterpret_cast<void*>(&Sleef_powf8_u10)},
+      {"Sleef_powf8", reinterpret_cast<void*>(&Sleef_powf8_u10)},
-    {"Sleef_fmodf8", reinterpret_cast<void*>(&Sleef_fmodf8)},
+      {"Sleef_fmodf8", reinterpret_cast<void*>(&Sleef_fmodf8)},
-    // FP64 Sleef functions -- SSE
+      // FP64 Sleef functions -- SSE
-    {"Sleef_acosd2", reinterpret_cast<void*>(&Sleef_acosd2_u10)},
+      {"Sleef_acosd2", reinterpret_cast<void*>(&Sleef_acosd2_u10)},
-    {"Sleef_asind2", reinterpret_cast<void*>(&Sleef_asind2_u10)},
+      {"Sleef_asind2", reinterpret_cast<void*>(&Sleef_asind2_u10)},
-    {"Sleef_atand2", reinterpret_cast<void*>(&Sleef_atand2_u10)},
+      {"Sleef_atand2", reinterpret_cast<void*>(&Sleef_atand2_u10)},
-    {"Sleef_cosd2", reinterpret_cast<void*>(&Sleef_cosd2_u10)},
+      {"Sleef_cosd2", reinterpret_cast<void*>(&Sleef_cosd2_u10)},
-    {"Sleef_sind2", reinterpret_cast<void*>(&Sleef_sind2_u10)},
+      {"Sleef_sind2", reinterpret_cast<void*>(&Sleef_sind2_u10)},
-    {"Sleef_tand2", reinterpret_cast<void*>(&Sleef_tand2_u10)},
+      {"Sleef_tand2", reinterpret_cast<void*>(&Sleef_tand2_u10)},
-    {"Sleef_coshd2", reinterpret_cast<void*>(&Sleef_coshd2_u10)},
+      {"Sleef_coshd2", reinterpret_cast<void*>(&Sleef_coshd2_u10)},
-    {"Sleef_sinhd2", reinterpret_cast<void*>(&Sleef_sinhd2_u10)},
+      {"Sleef_sinhd2", reinterpret_cast<void*>(&Sleef_sinhd2_u10)},
-    {"Sleef_tanhd2", reinterpret_cast<void*>(&Sleef_tanhd2_u10)},
+      {"Sleef_tanhd2", reinterpret_cast<void*>(&Sleef_tanhd2_u10)},
-    {"Sleef_erfd2", reinterpret_cast<void*>(&Sleef_erfd2_u10)},
+      {"Sleef_erfd2", reinterpret_cast<void*>(&Sleef_erfd2_u10)},
-    {"Sleef_erfcd2", reinterpret_cast<void*>(&Sleef_erfcd2_u15)},
+      {"Sleef_erfcd2", reinterpret_cast<void*>(&Sleef_erfcd2_u15)},
-    {"Sleef_expd2", reinterpret_cast<void*>(&Sleef_expd2_u10)},
+      {"Sleef_expd2", reinterpret_cast<void*>(&Sleef_expd2_u10)},
-    {"Sleef_expm1d2", reinterpret_cast<void*>(&Sleef_expm1d2_u10)},
+      {"Sleef_expm1d2", reinterpret_cast<void*>(&Sleef_expm1d2_u10)},
-    {"Sleef_logd2", reinterpret_cast<void*>(&Sleef_logd2_u10)},
+      {"Sleef_logd2", reinterpret_cast<void*>(&Sleef_logd2_u10)},
-    {"Sleef_log2d2", reinterpret_cast<void*>(&Sleef_log2d2_u10)},
+      {"Sleef_log2d2", reinterpret_cast<void*>(&Sleef_log2d2_u10)},
-    {"Sleef_log10d2", reinterpret_cast<void*>(&Sleef_log10d2_u10)},
+      {"Sleef_log10d2", reinterpret_cast<void*>(&Sleef_log10d2_u10)},
-    {"Sleef_log1pd2", reinterpret_cast<void*>(&Sleef_log1pd2_u10)},
+      {"Sleef_log1pd2", reinterpret_cast<void*>(&Sleef_log1pd2_u10)},
-    {"Sleef_sqrtd2", reinterpret_cast<void*>(&Sleef_sqrtd2_u05)},
+      {"Sleef_sqrtd2", reinterpret_cast<void*>(&Sleef_sqrtd2_u05)},
-    {"Sleef_fabsd2", reinterpret_cast<void*>(&Sleef_fabsd2)},
+      {"Sleef_fabsd2", reinterpret_cast<void*>(&Sleef_fabsd2)},
-    {"Sleef_floord2", reinterpret_cast<void*>(&Sleef_floord2)},
+      {"Sleef_floord2", reinterpret_cast<void*>(&Sleef_floord2)},
-    {"Sleef_ceild2", reinterpret_cast<void*>(&Sleef_ceild2)},
+      {"Sleef_ceild2", reinterpret_cast<void*>(&Sleef_ceild2)},
-    {"Sleef_truncd2", reinterpret_cast<void*>(&Sleef_truncd2)},
+      {"Sleef_truncd2", reinterpret_cast<void*>(&Sleef_truncd2)},
-    {"Sleef_roundd2", reinterpret_cast<void*>(&Sleef_roundd2)},
+      {"Sleef_roundd2", reinterpret_cast<void*>(&Sleef_roundd2)},
-    {"Sleef_lgammad2", reinterpret_cast<void*>(&Sleef_lgammad2_u10)},
+      {"Sleef_lgammad2", reinterpret_cast<void*>(&Sleef_lgammad2_u10)},
-    {"Sleef_atan2d2", reinterpret_cast<void*>(&Sleef_atan2d2_u10)},
+      {"Sleef_atan2d2", reinterpret_cast<void*>(&Sleef_atan2d2_u10)},
-    {"Sleef_powd2", reinterpret_cast<void*>(&Sleef_powd2_u10)},
+      {"Sleef_powd2", reinterpret_cast<void*>(&Sleef_powd2_u10)},
-    {"Sleef_fmodd2", reinterpret_cast<void*>(&Sleef_fmodd2)},
+      {"Sleef_fmodd2", reinterpret_cast<void*>(&Sleef_fmodd2)},
-    // FP64 Sleef functions -- AVX2
+      // FP64 Sleef functions -- AVX2
-    {"Sleef_acosd4", reinterpret_cast<void*>(&Sleef_acosd4_u10)},
+      {"Sleef_acosd4", reinterpret_cast<void*>(&Sleef_acosd4_u10)},
-    {"Sleef_asind4", reinterpret_cast<void*>(&Sleef_asind4_u10)},
+      {"Sleef_asind4", reinterpret_cast<void*>(&Sleef_asind4_u10)},
-    {"Sleef_atand4", reinterpret_cast<void*>(&Sleef_atand4_u10)},
+      {"Sleef_atand4", reinterpret_cast<void*>(&Sleef_atand4_u10)},
-    {"Sleef_cosd4", reinterpret_cast<void*>(&Sleef_cosd4_u10)},
+      {"Sleef_cosd4", reinterpret_cast<void*>(&Sleef_cosd4_u10)},
-    {"Sleef_sind4", reinterpret_cast<void*>(&Sleef_sind4_u10)},
+      {"Sleef_sind4", reinterpret_cast<void*>(&Sleef_sind4_u10)},
-    {"Sleef_tand4", reinterpret_cast<void*>(&Sleef_tand4_u10)},
+      {"Sleef_tand4", reinterpret_cast<void*>(&Sleef_tand4_u10)},
-    {"Sleef_coshd4", reinterpret_cast<void*>(&Sleef_coshd4_u10)},
+      {"Sleef_coshd4", reinterpret_cast<void*>(&Sleef_coshd4_u10)},
-    {"Sleef_sinhd4", reinterpret_cast<void*>(&Sleef_sinhd4_u10)},
+      {"Sleef_sinhd4", reinterpret_cast<void*>(&Sleef_sinhd4_u10)},
-    {"Sleef_tanhd4", reinterpret_cast<void*>(&Sleef_tanhd4_u10)},
+      {"Sleef_tanhd4", reinterpret_cast<void*>(&Sleef_tanhd4_u10)},
-    {"Sleef_erfd4", reinterpret_cast<void*>(&Sleef_erfd4_u10)},
+      {"Sleef_erfd4", reinterpret_cast<void*>(&Sleef_erfd4_u10)},
-    {"Sleef_erfcd4", reinterpret_cast<void*>(&Sleef_erfcd4_u15)},
+      {"Sleef_erfcd4", reinterpret_cast<void*>(&Sleef_erfcd4_u15)},
-    {"Sleef_expd4", reinterpret_cast<void*>(&Sleef_expd4_u10)},
+      {"Sleef_expd4", reinterpret_cast<void*>(&Sleef_expd4_u10)},
-    {"Sleef_expm1d4", reinterpret_cast<void*>(&Sleef_expm1d4_u10)},
+      {"Sleef_expm1d4", reinterpret_cast<void*>(&Sleef_expm1d4_u10)},
-    {"Sleef_logd4", reinterpret_cast<void*>(&Sleef_logd4_u10)},
+      {"Sleef_logd4", reinterpret_cast<void*>(&Sleef_logd4_u10)},
-    {"Sleef_log2d4", reinterpret_cast<void*>(&Sleef_log2d4_u10)},
+      {"Sleef_log2d4", reinterpret_cast<void*>(&Sleef_log2d4_u10)},
-    {"Sleef_log10d4", reinterpret_cast<void*>(&Sleef_log10d4_u10)},
+      {"Sleef_log10d4", reinterpret_cast<void*>(&Sleef_log10d4_u10)},
-    {"Sleef_log1pd4", reinterpret_cast<void*>(&Sleef_log1pd4_u10)},
+      {"Sleef_log1pd4", reinterpret_cast<void*>(&Sleef_log1pd4_u10)},
-    {"Sleef_sqrtd4", reinterpret_cast<void*>(&Sleef_sqrtd4_u05)},
+      {"Sleef_sqrtd4", reinterpret_cast<void*>(&Sleef_sqrtd4_u05)},
-    {"Sleef_fabsd4", reinterpret_cast<void*>(&Sleef_fabsd4)},
+      {"Sleef_fabsd4", reinterpret_cast<void*>(&Sleef_fabsd4)},
-    {"Sleef_floord4", reinterpret_cast<void*>(&Sleef_floord4)},
+      {"Sleef_floord4", reinterpret_cast<void*>(&Sleef_floord4)},
-    {"Sleef_ceild4", reinterpret_cast<void*>(&Sleef_ceild4)},
+      {"Sleef_ceild4", reinterpret_cast<void*>(&Sleef_ceild4)},
-    {"Sleef_truncd4", reinterpret_cast<void*>(&Sleef_truncd4)},
+      {"Sleef_truncd4", reinterpret_cast<void*>(&Sleef_truncd4)},
-    {"Sleef_roundd4", reinterpret_cast<void*>(&Sleef_roundd4)},
+      {"Sleef_roundd4", reinterpret_cast<void*>(&Sleef_roundd4)},
-    {"Sleef_lgammad4", reinterpret_cast<void*>(&Sleef_lgammad4_u10)},
+      {"Sleef_lgammad4", reinterpret_cast<void*>(&Sleef_lgammad4_u10)},
-    {"Sleef_atan2d4", reinterpret_cast<void*>(&Sleef_atan2d4_u10)},
+      {"Sleef_atan2d4", reinterpret_cast<void*>(&Sleef_atan2d4_u10)},
-    {"Sleef_powd4", reinterpret_cast<void*>(&Sleef_powd4_u10)},
+      {"Sleef_powd4", reinterpret_cast<void*>(&Sleef_powd4_u10)},
-    {"Sleef_fmodd4", reinterpret_cast<void*>(&Sleef_fmodd4)},
+      {"Sleef_fmodd4", reinterpret_cast<void*>(&Sleef_fmodd4)},
 #endif
  };
  return c10::ArrayRef<SymbolAddress>(symbolAddresses);
--- a/torch/csrc/monitor/counters.h
+++ b/torch/csrc/monitor/counters.h
@ -36,12 +36,11 @@ enum class C10_API_ENUM Aggregation {
  MIN = 6,
 };
-struct TORCH_API AggregationHash {
+struct TORCH_API AggregationHash{template <typename T> std::size_t operator()(
-  template <typename T>
+    T t) const {return static_cast<std::size_t>(t);
-  std::size_t operator()(T t) const {
+} // namespace torch::monitor
-    return static_cast<std::size_t>(t);
+}
-  }
+;
 };
 // aggregationName returns the human readable name corresponding to the
 // aggregation.
--- a/torch/csrc/profiler/collection.cpp
+++ b/torch/csrc/profiler/collection.cpp
@ -808,56 +808,58 @@ void generateForwardBackwardLink(
 void generateForwardBackwardLinks(
    std::unique_ptr<torch::profiler::impl::kineto::trace_t>& cpu_trace,
-    const std::vector<std::shared_ptr<Result>>& results){
+    const std::vector<std::shared_ptr<Result>>& results) {
 #ifndef USE_KINETO
 }
 #else // USE_KINETO
-    TORCH_INTERNAL_ASSERT(cpu_trace->activities.size() == results.size());
+  TORCH_INTERNAL_ASSERT(cpu_trace->activities.size() == results.size());
-// startThreadId_seqNum to pointer of activity.
+  // startThreadId_seqNum to pointer of activity.
-// Low-16bits of startThreadId and low-48bits seqNum are concatenated into
+  // Low-16bits of startThreadId and low-48bits seqNum are concatenated into
-// one uint64_t variable as key.
+  // one uint64_t variable as key.
-std::unordered_map<uint64_t, libkineto::GenericTraceActivity*> tidSeq2activity;
+  std::unordered_map<uint64_t, libkineto::GenericTraceActivity*>
-uint64_t fwd_bwd_link_id = 1;
+      tidSeq2activity;
  uint64_t fwd_bwd_link_id = 1;
-using result_activity_t = std::pair<Result*, libkineto::GenericTraceActivity*>;
+  using result_activity_t =
-std::vector<result_activity_t> torch_events;
+      std::pair<Result*, libkineto::GenericTraceActivity*>;
  std::vector<result_activity_t> torch_events;
-for (const auto idx : c10::irange(cpu_trace->activities.size())) {
+  for (const auto idx : c10::irange(cpu_trace->activities.size())) {
-  auto& profiler_result = results[idx];
+    auto& profiler_result = results[idx];
-  auto& activity = cpu_trace->activities[idx];
+    auto& activity = cpu_trace->activities[idx];
-  // add information about an associated forward op, if a sequence number
+    // add information about an associated forward op, if a sequence number
-  // is available (e.g. during training)
+    // is available (e.g. during training)
-  profiler_result->visit_if_base<ExtraFields<EventType::TorchOp>>(
+    profiler_result->visit_if_base<ExtraFields<EventType::TorchOp>>(
-      [&](const auto& e) {
+        [&](const auto& e) {
-        if (e.sequence_number_ >= 0) {
+          if (e.sequence_number_ >= 0) {
-          torch_events.emplace_back(profiler_result.get(), activity.get());
+            torch_events.emplace_back(profiler_result.get(), activity.get());
-        }
+          }
        });
  }
  // We need to visit the events in chronological order.
  // So we sort them by end_time_ns_ before processing.
  std::sort(
      torch_events.begin(),
      torch_events.end(),
      [](const result_activity_t& left, const result_activity_t& right) {
        auto left_end_time =
            std::get<ExtraFields<EventType::TorchOp>>(left.first->extra_fields_)
                .end_time_ns_;
        auto right_end_time = std::get<ExtraFields<EventType::TorchOp>>(
                                  right.first->extra_fields_)
                                  .end_time_ns_;
        return left_end_time < right_end_time;
      });
 }
-// We need to visit the events in chronological order.
+  for (auto& [profiler_result, activity] : torch_events) {
-// So we sort them by end_time_ns_ before processing.
+    generateForwardBackwardLink(
-std::sort(
+        *profiler_result, fwd_bwd_link_id, *activity, tidSeq2activity);
-    torch_events.begin(),
+  }
    torch_events.end(),
    [](const result_activity_t& left, const result_activity_t& right) {
      auto left_end_time =
          std::get<ExtraFields<EventType::TorchOp>>(left.first->extra_fields_)
              .end_time_ns_;
      auto right_end_time =
          std::get<ExtraFields<EventType::TorchOp>>(right.first->extra_fields_)
              .end_time_ns_;
      return left_end_time < right_end_time;
    });
 for (auto& [profiler_result, activity] : torch_events) {
  generateForwardBackwardLink(
      *profiler_result, fwd_bwd_link_id, *activity, tidSeq2activity);
 }
 }
 #endif // USE_KINETO
--- a/torch/csrc/profiler/perf.cpp
+++ b/torch/csrc/profiler/perf.cpp
@ -63,7 +63,7 @@ void PerfEvent::Init() {
    TORCH_CHECK(false, "Unsupported profiler event name: ", name_);
  }
-  struct perf_event_attr attr {};
+  struct perf_event_attr attr{};
  attr.size = sizeof(perf_event_attr);
  attr.type = it->second.first;
--- a/torch/csrc/profiler/python/init.cpp
+++ b/torch/csrc/profiler/python/init.cpp
@ -673,8 +673,8 @@ void initPythonBindings(PyObject* module) {
      {nullptr},
  };
-  static PyTypeObject RecordFunctionFast_Type = { PyVarObject_HEAD_INIT(nullptr,
+  static PyTypeObject RecordFunctionFast_Type = {
-                                                                        0)
+      PyVarObject_HEAD_INIT(nullptr, 0)
  };
  RecordFunctionFast_Type.tp_name = "torch._C._profiler.RecordFunctionFast",
--- a/torch/csrc/profiler/unwind/mem_file.h
+++ b/torch/csrc/profiler/unwind/mem_file.h
@ -46,7 +46,7 @@ struct MemFile {
        "failed to open {}: {}",
        filename_,
        c10::utils::str_error(errno));
-    struct stat s {};
+    struct stat s{};
    if (-1 == fstat(fd_, &s)) {
      close(fd_); // destructors don't run during exceptions
      UNWIND_CHECK(
--- a/torch/csrc/utils.h
+++ b/torch/csrc/utils.h
@ -101,11 +101,10 @@
 #define THPBoolUtils_newReal(value) THPUtils_newReal_BOOL(value)
 #define THPBoolUtils_checkAccreal(object) THPUtils_checkReal_BOOL(object)
 #define THPBoolUtils_unpackAccreal(object) \
-  (int64_t) THPUtils_unpackReal_BOOL(object)
+  (int64_t)THPUtils_unpackReal_BOOL(object)
 #define THPBoolUtils_newAccreal(value) THPUtils_newReal_BOOL(value)
 #define THPLongUtils_checkReal(object) THPUtils_checkReal_INT(object)
-#define THPLongUtils_unpackReal(object) \
+#define THPLongUtils_unpackReal(object) (int64_t)THPUtils_unpackReal_INT(object)
  (int64_t) THPUtils_unpackReal_INT(object)
 #define THPLongUtils_newReal(value) THPUtils_newReal_INT(value)
 #define THPIntUtils_checkReal(object) THPUtils_checkReal_INT(object)
 #define THPIntUtils_unpackReal(object) (int)THPUtils_unpackReal_INT(object)
--- a/torch/csrc/utils/byte_order.cpp
+++ b/torch/csrc/utils/byte_order.cpp
@ -362,7 +362,7 @@ TORCH_API void THP_encodeBuffer<c10::complex<double>>(
 #define DEFINE_ENCODE(TYPE)                       \
  template TORCH_API void THP_encodeBuffer<TYPE>( \
-      uint8_t * dst, const TYPE* src, THPByteOrder order, size_t len);
+      uint8_t* dst, const TYPE* src, THPByteOrder order, size_t len);
 DEFINE_ENCODE(int16_t)
 DEFINE_ENCODE(int32_t)