From 9fff8155c362da777e7ce31b85fb2dc7cfced2d5 Mon Sep 17 00:00:00 2001
From: Yuanyuan Chen <cyyever@outlook.com>
Date: Mon, 6 Oct 2025 01:06:01 +0000
Subject: [PATCH] [2/N] Fix clang-tidy readability checks  (#164652)

This PR applies clang-tidy readability checks to jit sources and all headers in the code base.
`readability-redundant-inline-specifier` is suppressed because it incurs too many changes. `readability-redundant-inline-specifier` is used to detect redundant inline specifiers on function and variable declarations. There are many in-class method definitions that are marked inline.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/164652
Approved by: https://github.com/Skylion007
---
 .clang-tidy                                        |  1 +
 aten/src/ATen/TensorIndexing.h                     |  2 +-
 aten/src/ATen/core/TransformationHelper.h          |  2 +-
 aten/src/ATen/core/boxing/BoxedKernel_impl.h       |  2 +-
 aten/src/ATen/core/boxing/KernelFunction_impl.h    |  4 +---
 aten/src/ATen/core/dispatch/Dispatcher.h           |  2 +-
 aten/src/ATen/core/op_registration/op_allowlist.h  |  2 +-
 .../ATen/core/op_registration/op_registration.h    |  2 --
 aten/src/ATen/cuda/CUDAGeneratorImpl.h             |  6 +++---
 aten/src/ATen/cuda/detail/DeviceThreadHandles.h    |  2 +-
 aten/src/ATen/native/RangeUtils.h                  |  2 +-
 aten/src/ATen/native/UpSample.h                    |  1 -
 aten/src/ATen/native/cpu/AtomicAddFloat.h          |  2 +-
 aten/src/ATen/native/cpu/BlasKernel.cpp            |  2 +-
 aten/src/ATen/native/cpu/ReduceUtils.h             |  1 -
 aten/src/ATen/native/cpu/SoftMaxKernel.cpp         |  1 -
 aten/src/ATen/native/cpu/Unfold2d.cpp              |  4 ++--
 aten/src/ATen/native/cpu/int4mm_kernel.cpp         |  4 ++--
 aten/src/ATen/native/quantized/cpu/QuantUtils.h    |  8 ++++----
 .../quantized/cpu/kernels/QuantizedOpKernels.cpp   | 10 +++++-----
 aten/src/ATen/ops/from_blob.h                      | 10 +++++-----
 c10/core/DeviceGuard.h                             |  2 +-
 c10/core/StreamGuard.h                             |  2 +-
 c10/core/impl/PyInterpreterHooks.h                 |  1 +
 c10/cuda/CUDACachingAllocator.h                    |  2 +-
 caffe2/utils/threadpool/WorkersPool.h              |  2 +-
 .../csrc/api/include/torch/nn/options/activation.h | 14 +++++++-------
 .../csrc/api/include/torch/nn/options/batchnorm.h  |  4 ++--
 torch/csrc/api/include/torch/nn/options/conv.h     |  4 ++--
 .../csrc/api/include/torch/nn/options/embedding.h  |  8 ++++----
 .../api/include/torch/nn/options/instancenorm.h    |  8 ++++----
 torch/csrc/api/include/torch/nn/options/loss.h     | 14 +++++++-------
 .../api/include/torch/nn/options/normalization.h   |  8 ++++----
 torch/csrc/api/include/torch/nn/options/pooling.h  |  2 +-
 torch/csrc/api/include/torch/optim/adam.h          |  2 +-
 torch/csrc/api/include/torch/optim/adamw.h         |  2 +-
 torch/csrc/api/include/torch/optim/lbfgs.h         |  6 +++---
 torch/csrc/api/include/torch/optim/rmsprop.h       |  4 ++--
 torch/csrc/autograd/graph_task.h                   |  2 +-
 torch/csrc/autograd/utils/lambda_post_hook.h       |  2 +-
 torch/csrc/distributed/c10d/FlightRecorder.hpp     |  4 ++--
 torch/csrc/distributed/c10d/NCCLUtils.hpp          |  2 +-
 torch/csrc/distributed/c10d/ParamCommsUtils.hpp    |  2 +-
 torch/csrc/distributed/c10d/ProcessGroup.hpp       |  1 -
 .../distributed/c10d/ProcessGroupGlooDetail.hpp    |  4 ++--
 torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp   | 10 +++++-----
 torch/csrc/distributed/c10d/TCPStore.hpp           |  2 +-
 torch/csrc/distributed/c10d/TCPStoreBackend.hpp    |  2 +-
 torch/csrc/distributed/c10d/Utils.hpp              |  2 +-
 .../c10d/control_collectives/StoreCollectives.hpp  |  2 +-
 torch/csrc/distributed/c10d/reducer.hpp            |  4 ++--
 torch/csrc/inductor/aoti_runtime/utils.h           |  2 +-
 torch/csrc/jit/frontend/function_schema_parser.cpp |  2 +-
 torch/csrc/jit/frontend/lexer.h                    |  6 +-----
 torch/csrc/jit/frontend/sugared_value.cpp          |  2 +-
 torch/csrc/jit/ir/ir.cpp                           |  2 +-
 .../mobile/compatibility/model_compatibility.cpp   |  9 +--------
 .../jit/mobile/compatibility/model_compatibility.h |  2 +-
 .../mobile/compatibility/runtime_compatibility.cpp |  4 ----
 torch/csrc/jit/mobile/flatbuffer_loader.h          |  7 -------
 torch/csrc/jit/mobile/import.cpp                   |  4 +---
 torch/csrc/jit/mobile/interpreter.cpp              |  2 +-
 torch/csrc/jit/mobile/parse_bytecode.cpp           |  2 +-
 torch/csrc/jit/mobile/train/optim/sgd.cpp          |  2 +-
 torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp     |  3 +--
 torch/csrc/jit/passes/onnx/helper.h                |  3 ---
 .../csrc/jit/passes/onnx/shape_type_inference.cpp  |  4 ++--
 .../passes/quantization/quantization_patterns.h    |  2 +-
 torch/csrc/jit/passes/tensorexpr_fuser.cpp         |  1 -
 torch/csrc/jit/python/python_arg_flatten.cpp       |  3 +--
 torch/csrc/jit/runtime/instruction.h               |  1 -
 torch/csrc/jit/runtime/register_prim_ops.cpp       |  8 ++++----
 torch/csrc/jit/runtime/static/impl.cpp             | 14 +++++++-------
 torch/csrc/jit/runtime/static/impl.h               |  8 ++++----
 torch/csrc/jit/runtime/static/memory_planner.h     |  6 +++---
 torch/csrc/jit/serialization/export.cpp            |  1 -
 torch/csrc/jit/serialization/export_module.cpp     |  6 +++---
 torch/csrc/jit/serialization/import.h              |  6 ------
 torch/csrc/jit/serialization/pickler_helper.h      |  4 ++--
 .../serialization/source_range_serialization.cpp   |  4 +---
 torch/csrc/jit/serialization/unpickler.cpp         |  4 ++--
 torch/csrc/jit/tensorexpr/cuda_codegen.cpp         |  3 +--
 torch/csrc/jit/tensorexpr/eval.h                   |  2 +-
 torch/csrc/jit/tensorexpr/kernel.cpp               |  2 +-
 .../csrc/jit/tensorexpr/loopnest_randomization.cpp |  2 +-
 torch/csrc/jit/tensorexpr/operators/quantization.h |  7 -------
 torch/csrc/profiler/collection.h                   |  2 +-
 torch/csrc/profiler/unwind/fde.h                   |  2 +-
 88 files changed, 142 insertions(+), 196 deletions(-)

diff --git a/.clang-tidy b/.clang-tidy
index 3c96106fe53d..71ffdf8cb224 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -66,6 +66,7 @@ readability-simplify-subscript-expr,
 readability-string-compare,
 -readability-redundant-access-specifiers,
 -readability-redundant-control-flow,
+-readability-redundant-inline-specifier,
 '
 HeaderFilterRegex: '^(aten/|c10/|torch/).*$'
 WarningsAsErrors: '*'
diff --git a/aten/src/ATen/TensorIndexing.h b/aten/src/ATen/TensorIndexing.h
index a487589833e8..7785ca861f0e 100644
--- a/aten/src/ATen/TensorIndexing.h
+++ b/aten/src/ATen/TensorIndexing.h
@@ -214,7 +214,7 @@ inline Tensor applySlice(
       "step must be greater than zero");
 
   // See NOTE [nested tensor size for indexing]
-  if (self_sizes.has_value() && self_sizes.value().size() > 0) {
+  if (self_sizes.has_value() && !self_sizes.value().empty()) {
     // Skip this optimization if we are tracing, as the trace may be polymorphic
     // over the shape of the `self` tensor, and we still want to record
     // the slice.
diff --git a/aten/src/ATen/core/TransformationHelper.h b/aten/src/ATen/core/TransformationHelper.h
index f81018a8e674..dad18bd019bb 100644
--- a/aten/src/ATen/core/TransformationHelper.h
+++ b/aten/src/ATen/core/TransformationHelper.h
@@ -117,7 +117,7 @@ C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) {
 template <>
 C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) {
   // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function
-  return median + sigma * at::tan(c10::pi<double> * (val - static_cast<double>(0.5)));
+  return median + sigma * at::tan(c10::pi<double> * (val - 0.5));
 }
 
 /**
diff --git a/aten/src/ATen/core/boxing/BoxedKernel_impl.h b/aten/src/ATen/core/boxing/BoxedKernel_impl.h
index 1960607c6bc8..331141bcc8c9 100644
--- a/aten/src/ATen/core/boxing/BoxedKernel_impl.h
+++ b/aten/src/ATen/core/boxing/BoxedKernel_impl.h
@@ -2,7 +2,7 @@
 
 namespace c10 {
 
-inline BoxedKernel::BoxedKernel() : functor_(), boxed_kernel_func_(nullptr) {}
+inline BoxedKernel::BoxedKernel() : boxed_kernel_func_(nullptr) {}
 
 inline BoxedKernel::BoxedKernel(
     std::unique_ptr<OperatorKernel> functor,
diff --git a/aten/src/ATen/core/boxing/KernelFunction_impl.h b/aten/src/ATen/core/boxing/KernelFunction_impl.h
index 672309ec19a2..bb981c1d4efd 100644
--- a/aten/src/ATen/core/boxing/KernelFunction_impl.h
+++ b/aten/src/ATen/core/boxing/KernelFunction_impl.h
@@ -20,9 +20,7 @@ make_unique_base(Args&&... args) {
 } // namespace detail
 
 inline KernelFunction::KernelFunction()
-    : boxed_kernel_func_(),
-      unboxed_kernel_func_(nullptr),
-      sym_unboxed_kernel_func_(nullptr) {}
+    : unboxed_kernel_func_(nullptr), sym_unboxed_kernel_func_(nullptr) {}
 
 inline KernelFunction::~KernelFunction() {
   if (tokens_) {
diff --git a/aten/src/ATen/core/dispatch/Dispatcher.h b/aten/src/ATen/core/dispatch/Dispatcher.h
index 43eb0028c70f..3a65b288bf8e 100644
--- a/aten/src/ATen/core/dispatch/Dispatcher.h
+++ b/aten/src/ATen/core/dispatch/Dispatcher.h
@@ -96,7 +96,7 @@ class TORCH_API Dispatcher final {
   friend class TypedOperatorHandle;
 
   struct Guard final {
-    Guard() : alive(true), mutex() {}
+    Guard() : alive(true) {}
     std::atomic<bool> alive;
     std::mutex mutex;
   };
diff --git a/aten/src/ATen/core/op_registration/op_allowlist.h b/aten/src/ATen/core/op_registration/op_allowlist.h
index 3e8e03f9fa4c..1f39ba4e3871 100644
--- a/aten/src/ATen/core/op_registration/op_allowlist.h
+++ b/aten/src/ATen/core/op_registration/op_allowlist.h
@@ -114,7 +114,7 @@ constexpr bool allowlist_contains(std::string_view allowlist, std::string_view i
         }
         next++;
       } else {
-        if (allowlist.substr(cur).compare(item) == 0) {
+        if (allowlist.substr(cur) == item) {
           return true;
         }
         break;
diff --git a/aten/src/ATen/core/op_registration/op_registration.h b/aten/src/ATen/core/op_registration/op_registration.h
index 7a44cfa49b07..d441269bf297 100644
--- a/aten/src/ATen/core/op_registration/op_registration.h
+++ b/aten/src/ATen/core/op_registration/op_registration.h
@@ -411,7 +411,6 @@ public:
 
     Options()
     : schemaOrName_(std::nullopt)
-    , kernels()
     , aliasAnalysisKind_(std::nullopt)
     {}
 
@@ -420,7 +419,6 @@ public:
     struct KernelRegistrationConfig final {
       KernelRegistrationConfig()
         : dispatch_key(std::nullopt)
-        , func()
         , cpp_signature(std::nullopt)
         , inferred_function_schema(nullptr)
       {}
diff --git a/aten/src/ATen/cuda/CUDAGeneratorImpl.h b/aten/src/ATen/cuda/CUDAGeneratorImpl.h
index d31fde56ebc1..d4ab49382e7f 100644
--- a/aten/src/ATen/cuda/CUDAGeneratorImpl.h
+++ b/aten/src/ATen/cuda/CUDAGeneratorImpl.h
@@ -99,8 +99,8 @@ struct CUDAGeneratorState : public c10::intrusive_ptr_target {
   uint64_t offset_intragraph_;
   bool capturing_{};
   std::unordered_set<cuda::CUDAGraph*> registered_graphs_;
-  at::TensorBase seed_extragraph_{};
-  at::TensorBase offset_extragraph_{};
+  at::TensorBase seed_extragraph_;
+  at::TensorBase offset_extragraph_;
 
   CUDAGeneratorState(
       uint64_t seed = default_rng_seed_val,
@@ -167,7 +167,7 @@ struct TORCH_CUDA_CPP_API CUDAGeneratorImpl : public c10::GeneratorImpl {
   CUDAGeneratorImpl* clone_impl() const override;
 
   c10::intrusive_ptr<CUDAGeneratorState> state_;
-  std::atomic_flag no_reset_rnn_state_{};
+  std::atomic_flag no_reset_rnn_state_;
 };
 
 namespace cuda::detail {
diff --git a/aten/src/ATen/cuda/detail/DeviceThreadHandles.h b/aten/src/ATen/cuda/detail/DeviceThreadHandles.h
index 1f80c863b639..71a344d281d2 100644
--- a/aten/src/ATen/cuda/detail/DeviceThreadHandles.h
+++ b/aten/src/ATen/cuda/detail/DeviceThreadHandles.h
@@ -122,7 +122,7 @@ struct DeviceThreadHandlePool : public std::enable_shared_from_this<DeviceThread
 
     // Called by the destructor.  Releases this thread's handles back into the pool.
     void release() {
-        if(my_handles.size() > 0) {
+        if(!my_handles.empty()) {
             auto parent = weak_parent.lock();
             if (!parent) {
                 // If this thread exits after atexit handlers have completed, the
diff --git a/aten/src/ATen/native/RangeUtils.h b/aten/src/ATen/native/RangeUtils.h
index dcab86ca9a42..fd62b8e01329 100644
--- a/aten/src/ATen/native/RangeUtils.h
+++ b/aten/src/ATen/native/RangeUtils.h
@@ -47,7 +47,7 @@ int64_t compute_arange_size(const Scalar& start, const Scalar& end, const Scalar
     int64_t sgn = (xstep > 0) - (xstep < 0);
     size_d = std::ceil((xend - xstart + xstep - sgn) / xstep);
   } else {
-    size_d = std::ceil(static_cast<double>(end.to<double>() - start.to<double>())
+    size_d = std::ceil((end.to<double>() - start.to<double>())
                         / step.to<double>());
   }
 
diff --git a/aten/src/ATen/native/UpSample.h b/aten/src/ATen/native/UpSample.h
index 5b49fdd02954..d37ccbe2763d 100644
--- a/aten/src/ATen/native/UpSample.h
+++ b/aten/src/ATen/native/UpSample.h
@@ -4,7 +4,6 @@
 
 #include <ATen/OpMathType.h>
 #include <ATen/TensorUtils.h>
-#include <ATen/OpMathType.h>
 #include <ATen/core/Tensor.h>
 #include <ATen/cpu/vec/functional.h>
 #include <ATen/cpu/vec/vec.h>
diff --git a/aten/src/ATen/native/cpu/AtomicAddFloat.h b/aten/src/ATen/native/cpu/AtomicAddFloat.h
index 5b24ee4821c4..526f86d705b7 100644
--- a/aten/src/ATen/native/cpu/AtomicAddFloat.h
+++ b/aten/src/ATen/native/cpu/AtomicAddFloat.h
@@ -22,7 +22,7 @@ static inline void cpu_atomic_add_float(float* dst, float fvalue)
   old_value.floatV = *dst;
   new_value.floatV = old_value.floatV + fvalue;
 
-  unsigned* old_intV = (unsigned*)(&old_value.intV);
+  unsigned* old_intV = &old_value.intV;
   while (!std::atomic_compare_exchange_strong(dst_intV, old_intV, new_value.intV)) {
 #ifdef __aarch64__
     __asm__ __volatile__("yield;" : : : "memory");
diff --git a/aten/src/ATen/native/cpu/BlasKernel.cpp b/aten/src/ATen/native/cpu/BlasKernel.cpp
index ab3b16c395a3..5256b964ec49 100644
--- a/aten/src/ATen/native/cpu/BlasKernel.cpp
+++ b/aten/src/ATen/native/cpu/BlasKernel.cpp
@@ -118,7 +118,7 @@ gemm_notrans_(
   scale_(m, n, beta, c, ldc);
 
   // c += alpha * (a @ b)
-  const uint64_t unsigned_m = static_cast<int64_t>(m);
+  const uint64_t unsigned_m = m;
   const uint64_t i_m = unsigned_m / 4;
   for (const uint64_t l : c10::irange(k)) {
     for (const uint64_t j : c10::irange(n)) {
diff --git a/aten/src/ATen/native/cpu/ReduceUtils.h b/aten/src/ATen/native/cpu/ReduceUtils.h
index fd7c4a2750a6..1b0be8d18db7 100644
--- a/aten/src/ATen/native/cpu/ReduceUtils.h
+++ b/aten/src/ATen/native/cpu/ReduceUtils.h
@@ -8,7 +8,6 @@
 #include <c10/util/irange.h>
 #include <ATen/OpMathType.h>
 #include <ATen/native/cpu/utils.h>
-#include <ATen/OpMathType.h>
 
 namespace at::native {
 inline namespace CPU_CAPABILITY {
diff --git a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp
index dac0f3bef25e..5c677f648ca6 100644
--- a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp
+++ b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp
@@ -17,7 +17,6 @@
 #include <ATen/cpu/vec/functional.h>
 #include <ATen/cpu/vec/vec.h>
 #include <c10/util/irange.h>
-#include <ATen/OpMathType.h>
 
 // [Note AVX-SSE transitions] In general we avoid calls into cmath for code
 // compiled with AVX/AVX2 This is because of SSE-AVX transitions and a bug in
diff --git a/aten/src/ATen/native/cpu/Unfold2d.cpp b/aten/src/ATen/native/cpu/Unfold2d.cpp
index 8c94decfff02..06958fce1754 100644
--- a/aten/src/ATen/native/cpu/Unfold2d.cpp
+++ b/aten/src/ATen/native/cpu/Unfold2d.cpp
@@ -240,7 +240,7 @@ static void unfolded2d_copy(
     int64_t output_height,
     int64_t output_width) {
   at::parallel_for(
-      0, (int64_t)n_input_plane * kH * kW, 0, [&](int64_t start, int64_t end) {
+      0, n_input_plane * kH * kW, 0, [&](int64_t start, int64_t end) {
         for (const auto k : c10::irange(start, end)) {
           int64_t nip = k / (kH * kW);
           int64_t rest = k % (kH * kW);
@@ -316,7 +316,7 @@ static void unfolded2d_copy(
                 for (int64_t x = 0; x < output_width; x++)
                   memcpy(
                       dst + (size_t)y * output_width + x,
-                      src + (size_t)iy * input_width + ix + (int64_t)x * dW,
+                      src + (size_t)iy * input_width + ix + x * dW,
                       sizeof(scalar_t) * (1));
               }
             }
diff --git a/aten/src/ATen/native/cpu/int4mm_kernel.cpp b/aten/src/ATen/native/cpu/int4mm_kernel.cpp
index c8e0b8e86793..7e0e732d9c83 100644
--- a/aten/src/ATen/native/cpu/int4mm_kernel.cpp
+++ b/aten/src/ATen/native/cpu/int4mm_kernel.cpp
@@ -906,7 +906,7 @@ static void ref_dyn_quant_matmul_4bit_channelwise_kernel(
           // Round to nearest integer
           const int32_t nudged_zero_point0 = lrintf(zero_point0);
 
-          int8_t* dst_ptr = (int8_t*)lhs_qa8dx + m_idx * dst_stride;
+          int8_t* dst_ptr = lhs_qa8dx + m_idx * dst_stride;
 
           // LHS offset at the beginning of the row
           *((float*)(dst_ptr)) = recip_scale0;
@@ -1048,7 +1048,7 @@ static void ref_dyn_quant_matmul_4bit_groupwise_kernel(
       zero_point0 = (std::min)(zero_point0, qmax);
       const int32_t nudged_zero_point0 = lrintf(zero_point0);
 
-      int8_t* dst_ptr = (int8_t*)lhs_qa8dx + row_idx * dst_stride;
+      int8_t* dst_ptr = lhs_qa8dx + row_idx * dst_stride;
 
       *((float*)(dst_ptr)) = recip_scale0;
       dst_ptr += sizeof(float);
diff --git a/aten/src/ATen/native/quantized/cpu/QuantUtils.h b/aten/src/ATen/native/quantized/cpu/QuantUtils.h
index e81b0d87916b..686bbf4f8317 100644
--- a/aten/src/ATen/native/quantized/cpu/QuantUtils.h
+++ b/aten/src/ATen/native/quantized/cpu/QuantUtils.h
@@ -146,12 +146,12 @@ inline TensorQuantizationParams ChooseQuantizationParams(
   // The arithmetic error on the zero point computed from either pair
   // will be roughly machine_epsilon * (sum of absolute values of terms)
   // so we want to use the variant that adds the smaller terms.
-  double zero_point_from_min = qmin - min / static_cast<double>(scale);
-  double zero_point_from_max = qmax - max / static_cast<double>(scale);
+  double zero_point_from_min = qmin - min / scale;
+  double zero_point_from_max = qmax - max / scale;
   double zero_point_from_min_error =
-      std::abs(qmin) - std::abs(min / static_cast<double>(scale));
+      std::abs(qmin) - std::abs(min / scale);
   double zero_point_from_max_error =
-      std::abs(qmax) - std::abs(max / static_cast<double>(scale));
+      std::abs(qmax) - std::abs(max / scale);
   double initial_zero_point =
       zero_point_from_min_error < zero_point_from_max_error
       ? zero_point_from_min
diff --git a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp
index b5b887b98bb0..89bb033a6b03 100644
--- a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp
+++ b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp
@@ -560,7 +560,7 @@ float hsum_sq(const int32_t* A, int len) {
   alignas(64) float temp[8];
   _mm256_store_ps(temp, sum_ps);
   for (const auto k : c10::irange(8)) {
-    row_sum += static_cast<float>(temp[k]);
+    row_sum += temp[k];
   }
 #elif defined(CPU_CAPABILITY_AVX512)
   __m512 sum_ps = _mm512_setzero_ps();
@@ -574,7 +574,7 @@ float hsum_sq(const int32_t* A, int len) {
   alignas(64) float temp[16];
   _mm512_store_ps(temp, sum_ps);
   for (const auto k : c10::irange(16)) {
-    row_sum += static_cast<float>(temp[k]);
+    row_sum += temp[k];
   }
 #endif // CPU_CAPABILITY_AVX2 or CPU_CAPABILITY_AVX512
 
@@ -1282,7 +1282,7 @@ template <bool ReLUFused = false>
 void qadd_scalar_kernel(Tensor& out, const Tensor& self, const Scalar& other) {
   int64_t zero_point = out.q_zero_point();
   float scale = static_cast<float>(out.q_scale());
-  float inv_scale = static_cast<float>(1.0f / scale);
+  float inv_scale = 1.0f / scale;
   int64_t self_zero_point = self.q_zero_point();
   float self_scale = static_cast<float>(self.q_scale());
 
@@ -2915,7 +2915,7 @@ void fake_quantize_learnable_channel_grad_kernel_cpu(
       // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions)
       *dx_output = (*dy_input) * (xqi >= quant_min && xqi <= quant_max);
       // Calculate gradients for scale and zero point.
-      float xfqi = static_cast<float>((std::max(std::min(xqi, quant_max), quant_min) - (*zero_point_input)) * (*scale_input));
+      float xfqi = ((std::max(std::min(xqi, quant_max), quant_min) - (*zero_point_input)) * (*scale_input));
       if (xqi < quant_min || xqi > quant_max) {
         *dzero_point_output = (*dy_input) * (-1) * (*scale_input) * grad_factor;
         *dscale_output = ((xqi < quant_min) ? ((*dy_input) * dscale_small) : ((*dy_input) * dscale_big)) * grad_factor;
@@ -4415,7 +4415,7 @@ void _qmul_tensor_cpu_impl(
     uint8_t y_data = *(y_ptr + idx);
     int32_t x_val = static_cast<int32_t>(x_data) - x_zero_point;
     int32_t y_val = static_cast<int32_t>(y_data) - y_zero_point;
-    int32_t out_val = static_cast<int32_t>(x_val * y_val);
+    int32_t out_val = x_val * y_val;
     float out_val_f = (float)out_val * multiplier;
     if constexpr (std::is_same<T, float>::value) {
       *(out_ptr + idx) = out_val_f;
diff --git a/aten/src/ATen/ops/from_blob.h b/aten/src/ATen/ops/from_blob.h
index a209380abb64..63b15ef5ca1b 100644
--- a/aten/src/ATen/ops/from_blob.h
+++ b/aten/src/ATen/ops/from_blob.h
@@ -90,12 +90,12 @@ class TORCH_API TensorMaker {
 
   void* data_;
   IntArrayRef sizes_;
-  OptionalIntArrayRef strides_{};
-  std::optional<int64_t> storage_offset_{};
-  std::function<void(void*)> deleter_{};
+  OptionalIntArrayRef strides_;
+  std::optional<int64_t> storage_offset_;
+  std::function<void(void*)> deleter_;
   std::unique_ptr<void, ContextDeleter> ctx_{nullptr, detail::noopDelete};
-  std::optional<Device> device_{};
-  TensorOptions opts_{};
+  std::optional<Device> device_;
+  TensorOptions opts_;
   bool resizeable_{};
   c10::Allocator* allocator_{};
 };
diff --git a/c10/core/DeviceGuard.h b/c10/core/DeviceGuard.h
index 7fa366049480..682c58a0a155 100644
--- a/c10/core/DeviceGuard.h
+++ b/c10/core/DeviceGuard.h
@@ -182,7 +182,7 @@ class OptionalDeviceGuard {
   }
 
  private:
-  impl::InlineOptionalDeviceGuard<impl::VirtualGuardImpl> guard_{};
+  impl::InlineOptionalDeviceGuard<impl::VirtualGuardImpl> guard_;
 };
 
 // Note [Whither the DeviceGuard boilerplate]
diff --git a/c10/core/StreamGuard.h b/c10/core/StreamGuard.h
index d3057823a5cd..c901a8a768f1 100644
--- a/c10/core/StreamGuard.h
+++ b/c10/core/StreamGuard.h
@@ -143,7 +143,7 @@ struct OptionalStreamGuard {
   }
 
  private:
-  c10::impl::InlineOptionalStreamGuard<impl::VirtualGuardImpl> guard_{};
+  c10::impl::InlineOptionalStreamGuard<impl::VirtualGuardImpl> guard_;
 };
 
 /**
diff --git a/c10/core/impl/PyInterpreterHooks.h b/c10/core/impl/PyInterpreterHooks.h
index 32a17ad9a8a0..8cb058105158 100644
--- a/c10/core/impl/PyInterpreterHooks.h
+++ b/c10/core/impl/PyInterpreterHooks.h
@@ -34,6 +34,7 @@ C10_DECLARE_REGISTRY(
 // Get the global PyInterpreter hooks instance
 C10_API const PyInterpreterHooksInterface& getPyInterpreterHooks();
 
+// Helper function to get the global interpreter
 C10_API PyInterpreter* getGlobalPyInterpreter();
 
 } // namespace c10::impl
diff --git a/c10/cuda/CUDACachingAllocator.h b/c10/cuda/CUDACachingAllocator.h
index 7bd53d8a70b7..84acfd78209a 100644
--- a/c10/cuda/CUDACachingAllocator.h
+++ b/c10/cuda/CUDACachingAllocator.h
@@ -137,7 +137,7 @@ struct TraceEntry {
   size_t size_;
   MempoolId_t mempool_;
   trace_time_ time_{};
-  std::string compile_context_{};
+  std::string compile_context_;
 };
 
 // Calls made by record_function will save annotations
diff --git a/caffe2/utils/threadpool/WorkersPool.h b/caffe2/utils/threadpool/WorkersPool.h
index 5de6b1213e84..274456ffc532 100644
--- a/caffe2/utils/threadpool/WorkersPool.h
+++ b/caffe2/utils/threadpool/WorkersPool.h
@@ -39,7 +39,7 @@ struct AllocAligned {
 #elif defined(_MSC_VER)
     p = _aligned_malloc(sizeof(T), kGEMMLOWPCacheLineSize);
 #else
-    auto res = posix_memalign((void**)&p, kGEMMLOWPCacheLineSize, sizeof(T));
+    auto res = posix_memalign(&p, kGEMMLOWPCacheLineSize, sizeof(T));
     (void)res;
 #endif
 
diff --git a/torch/csrc/api/include/torch/nn/options/activation.h b/torch/csrc/api/include/torch/nn/options/activation.h
index 480e09ad4de2..00c7a99e6751 100644
--- a/torch/csrc/api/include/torch/nn/options/activation.h
+++ b/torch/csrc/api/include/torch/nn/options/activation.h
@@ -686,23 +686,23 @@ struct TORCH_API MultiheadAttentionForwardFuncOptions {
 
   TORCH_ARG(bool, training) = true;
 
-  TORCH_ARG(Tensor, key_padding_mask) = {};
+  TORCH_ARG(Tensor, key_padding_mask);
 
   TORCH_ARG(bool, need_weights) = true;
 
-  TORCH_ARG(Tensor, attn_mask) = {};
+  TORCH_ARG(Tensor, attn_mask);
 
   TORCH_ARG(bool, use_separate_proj_weight) = false;
 
-  TORCH_ARG(Tensor, q_proj_weight) = {};
+  TORCH_ARG(Tensor, q_proj_weight);
 
-  TORCH_ARG(Tensor, k_proj_weight) = {};
+  TORCH_ARG(Tensor, k_proj_weight);
 
-  TORCH_ARG(Tensor, v_proj_weight) = {};
+  TORCH_ARG(Tensor, v_proj_weight);
 
-  TORCH_ARG(Tensor, static_k) = {};
+  TORCH_ARG(Tensor, static_k);
 
-  TORCH_ARG(Tensor, static_v) = {};
+  TORCH_ARG(Tensor, static_v);
 
   TORCH_ARG(bool, average_attn_weights) = true;
 };
diff --git a/torch/csrc/api/include/torch/nn/options/batchnorm.h b/torch/csrc/api/include/torch/nn/options/batchnorm.h
index d77cfb4f0d15..78a287207c3a 100644
--- a/torch/csrc/api/include/torch/nn/options/batchnorm.h
+++ b/torch/csrc/api/include/torch/nn/options/batchnorm.h
@@ -73,9 +73,9 @@ namespace functional {
 /// F::BatchNormFuncOptions().weight(weight).bias(bias).momentum(0.1).eps(1e-05).training(false));
 /// ```
 struct TORCH_API BatchNormFuncOptions {
-  TORCH_ARG(Tensor, weight) = Tensor();
+  TORCH_ARG(Tensor, weight);
 
-  TORCH_ARG(Tensor, bias) = Tensor();
+  TORCH_ARG(Tensor, bias);
 
   TORCH_ARG(bool, training) = false;
 
diff --git a/torch/csrc/api/include/torch/nn/options/conv.h b/torch/csrc/api/include/torch/nn/options/conv.h
index f10d5e9a3106..bbaecbeb97b6 100644
--- a/torch/csrc/api/include/torch/nn/options/conv.h
+++ b/torch/csrc/api/include/torch/nn/options/conv.h
@@ -196,7 +196,7 @@ struct ConvFuncOptions {
   using padding_t = torch::nn::detail::conv_padding_t<D>;
 
   /// optional bias of shape `(out_channels)`. Default: ``None``
-  TORCH_ARG(torch::Tensor, bias) = Tensor();
+  TORCH_ARG(torch::Tensor, bias);
 
   /// The stride of the convolving kernel.
   /// For a `D`-dim convolution, must be a single number or a list of `D`
@@ -352,7 +352,7 @@ namespace functional {
 template <size_t D>
 struct ConvTransposeFuncOptions {
   /// optional bias of shape `(out_channels)`. Default: ``None``
-  TORCH_ARG(torch::Tensor, bias) = Tensor();
+  TORCH_ARG(torch::Tensor, bias);
 
   /// The stride of the convolving kernel.
   /// For a `D`-dim convolution, must be a single number or a list of `D`
diff --git a/torch/csrc/api/include/torch/nn/options/embedding.h b/torch/csrc/api/include/torch/nn/options/embedding.h
index be689f12b3bd..3c62b2a06852 100644
--- a/torch/csrc/api/include/torch/nn/options/embedding.h
+++ b/torch/csrc/api/include/torch/nn/options/embedding.h
@@ -40,7 +40,7 @@ struct TORCH_API EmbeddingOptions {
   TORCH_ARG(bool, sparse) = false;
   /// The learnable weights of the module of shape (num_embeddings,
   /// embedding_dim)
-  TORCH_ARG(torch::Tensor, _weight) = Tensor();
+  TORCH_ARG(torch::Tensor, _weight);
 };
 
 // ============================================================================
@@ -136,7 +136,7 @@ struct TORCH_API EmbeddingBagOptions {
   TORCH_ARG(bool, sparse) = false;
   /// The learnable weights of the module of shape (num_embeddings,
   /// embedding_dim)
-  TORCH_ARG(torch::Tensor, _weight) = Tensor();
+  TORCH_ARG(torch::Tensor, _weight);
   /// If ``true``, `offsets` has one additional element, where the last element
   /// is equivalent to the size of `indices`. This matches the CSR format.
   TORCH_ARG(bool, include_last_offset) = false;
@@ -201,7 +201,7 @@ namespace functional {
 struct TORCH_API EmbeddingBagFuncOptions {
   /// Only used when `input` is 1D. `offsets` determines
   /// the starting index position of each bag (sequence) in `input`.
-  TORCH_ARG(torch::Tensor, offsets) = Tensor();
+  TORCH_ARG(torch::Tensor, offsets);
   /// If given, each embedding vector with norm larger than `max_norm` is
   /// renormalized to have norm `max_norm`.
   TORCH_ARG(std::optional<double>, max_norm) = std::nullopt;
@@ -223,7 +223,7 @@ struct TORCH_API EmbeddingBagFuncOptions {
   /// be taken to be 1. If specified, `per_sample_weights` must have exactly the
   /// same shape as input and is treated as having the same `offsets`, if those
   /// are not None.
-  TORCH_ARG(torch::Tensor, per_sample_weights) = Tensor();
+  TORCH_ARG(torch::Tensor, per_sample_weights);
   /// If ``true``, `offsets` has one additional element, where the last element
   /// is equivalent to the size of `indices`. This matches the CSR format. Note:
   /// this option is currently only supported when ``mode="sum"``.
diff --git a/torch/csrc/api/include/torch/nn/options/instancenorm.h b/torch/csrc/api/include/torch/nn/options/instancenorm.h
index 2c90a060340b..c37832407edf 100644
--- a/torch/csrc/api/include/torch/nn/options/instancenorm.h
+++ b/torch/csrc/api/include/torch/nn/options/instancenorm.h
@@ -67,13 +67,13 @@ namespace functional {
 /// F::InstanceNormFuncOptions().running_mean(mean).running_var(variance).weight(weight).bias(bias).momentum(0.1).eps(1e-5));
 /// ```
 struct TORCH_API InstanceNormFuncOptions {
-  TORCH_ARG(Tensor, running_mean) = Tensor();
+  TORCH_ARG(Tensor, running_mean);
 
-  TORCH_ARG(Tensor, running_var) = Tensor();
+  TORCH_ARG(Tensor, running_var);
 
-  TORCH_ARG(Tensor, weight) = Tensor();
+  TORCH_ARG(Tensor, weight);
 
-  TORCH_ARG(Tensor, bias) = Tensor();
+  TORCH_ARG(Tensor, bias);
 
   TORCH_ARG(bool, use_input_stats) = true;
 
diff --git a/torch/csrc/api/include/torch/nn/options/loss.h b/torch/csrc/api/include/torch/nn/options/loss.h
index 88d954c5e18b..b004fae8cdb0 100644
--- a/torch/csrc/api/include/torch/nn/options/loss.h
+++ b/torch/csrc/api/include/torch/nn/options/loss.h
@@ -131,7 +131,7 @@ struct TORCH_API BCELossOptions {
       reduction_t;
 
   /// A manual rescaling weight given to the loss of each batch element.
-  TORCH_ARG(Tensor, weight) = {};
+  TORCH_ARG(Tensor, weight);
   /// Specifies the reduction to apply to the output.
   /// ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'``
   TORCH_ARG(reduction_t, reduction) = torch::kMean;
@@ -207,7 +207,7 @@ struct TORCH_API MultiMarginLossOptions {
   /// A manual rescaling weight given to each
   /// class. If given, it has to be a Tensor of size `C`. Otherwise, it is
   /// treated as if having all ones.
-  TORCH_ARG(Tensor, weight) = Tensor();
+  TORCH_ARG(Tensor, weight);
   /// Specifies the reduction to apply to the output:
   /// ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be
   /// applied,
@@ -365,7 +365,7 @@ struct TORCH_API MultiLabelSoftMarginLossOptions {
   /// A manual rescaling weight given to each
   /// class. If given, it has to be a Tensor of size `C`. Otherwise, it is
   /// treated as if having all ones.
-  TORCH_ARG(Tensor, weight) = Tensor();
+  TORCH_ARG(Tensor, weight);
 
   /// Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'.
   /// 'none': no reduction will be applied, 'mean': the sum of the output will
@@ -697,7 +697,7 @@ struct TORCH_API NLLLossOptions {
   /// A manual rescaling weight given to each
   /// class. If given, it has to be a Tensor of size `C`. Otherwise, it is
   /// treated as if having all ones.
-  TORCH_ARG(Tensor, weight) = {};
+  TORCH_ARG(Tensor, weight);
   /// Specifies a target value that is ignored
   /// and does not contribute to the input gradient.
   TORCH_ARG(int64_t, ignore_index) = -100;
@@ -735,7 +735,7 @@ struct TORCH_API CrossEntropyLossOptions {
 
   /// A manual rescaling weight given to each class. If given, has to be a
   /// Tensor of size C
-  TORCH_ARG(Tensor, weight) = {};
+  TORCH_ARG(Tensor, weight);
   /// Specifies a target value that is ignored
   /// and does not contribute to the input gradient.
   TORCH_ARG(int64_t, ignore_index) = -100;
@@ -774,12 +774,12 @@ struct TORCH_API BCEWithLogitsLossOptions {
       reduction_t;
   /// A manual rescaling weight given to the loss of each batch element.
   /// If given, has to be a Tensor of size `nbatch`.
-  TORCH_ARG(Tensor, weight) = {};
+  TORCH_ARG(Tensor, weight);
   /// Specifies the reduction to apply to the output. Default: Mean
   TORCH_ARG(reduction_t, reduction) = torch::kMean;
   /// A weight of positive examples.
   /// Must be a vector with length equal to the number of classes.
-  TORCH_ARG(Tensor, pos_weight) = {};
+  TORCH_ARG(Tensor, pos_weight);
 };
 
 namespace functional {
diff --git a/torch/csrc/api/include/torch/nn/options/normalization.h b/torch/csrc/api/include/torch/nn/options/normalization.h
index 6097a2923af2..439f8b2a9808 100644
--- a/torch/csrc/api/include/torch/nn/options/normalization.h
+++ b/torch/csrc/api/include/torch/nn/options/normalization.h
@@ -43,9 +43,9 @@ struct TORCH_API LayerNormFuncOptions {
   /// input shape from an expected input.
   TORCH_ARG(std::vector<int64_t>, normalized_shape);
 
-  TORCH_ARG(Tensor, weight) = {};
+  TORCH_ARG(Tensor, weight);
 
-  TORCH_ARG(Tensor, bias) = {};
+  TORCH_ARG(Tensor, bias);
 
   /// a value added to the denominator for numerical stability. ``Default:
   /// 1e-5``.
@@ -177,9 +177,9 @@ struct TORCH_API GroupNormFuncOptions {
   /// number of groups to separate the channels into
   TORCH_ARG(int64_t, num_groups);
 
-  TORCH_ARG(Tensor, weight) = {};
+  TORCH_ARG(Tensor, weight);
 
-  TORCH_ARG(Tensor, bias) = {};
+  TORCH_ARG(Tensor, bias);
 
   /// a value added to the denominator for numerical stability. Default: 1e-5
   TORCH_ARG(double, eps) = 1e-5;
diff --git a/torch/csrc/api/include/torch/nn/options/pooling.h b/torch/csrc/api/include/torch/nn/options/pooling.h
index 3934f326c8a5..4449a16f2206 100644
--- a/torch/csrc/api/include/torch/nn/options/pooling.h
+++ b/torch/csrc/api/include/torch/nn/options/pooling.h
@@ -456,7 +456,7 @@ struct FractionalMaxPoolOptions {
   using ExpandingArrayDouble = torch::ExpandingArray<D, double>;
   TORCH_ARG(std::optional<ExpandingArrayDouble>, output_ratio) = std::nullopt;
 
-  TORCH_ARG(torch::Tensor, _random_samples) = Tensor();
+  TORCH_ARG(torch::Tensor, _random_samples);
 };
 
 /// `FractionalMaxPoolOptions` specialized for the `FractionalMaxPool2d` module.
diff --git a/torch/csrc/api/include/torch/optim/adam.h b/torch/csrc/api/include/torch/optim/adam.h
index 6c06e4030cf4..c75aac357717 100644
--- a/torch/csrc/api/include/torch/optim/adam.h
+++ b/torch/csrc/api/include/torch/optim/adam.h
@@ -38,7 +38,7 @@ struct TORCH_API AdamParamState
   TORCH_ARG(int64_t, step) = 0;
   TORCH_ARG(torch::Tensor, exp_avg);
   TORCH_ARG(torch::Tensor, exp_avg_sq);
-  TORCH_ARG(torch::Tensor, max_exp_avg_sq) = {};
+  TORCH_ARG(torch::Tensor, max_exp_avg_sq);
 
  public:
   void serialize(torch::serialize::InputArchive& archive) override;
diff --git a/torch/csrc/api/include/torch/optim/adamw.h b/torch/csrc/api/include/torch/optim/adamw.h
index d656921a719d..8b8c4c45f406 100644
--- a/torch/csrc/api/include/torch/optim/adamw.h
+++ b/torch/csrc/api/include/torch/optim/adamw.h
@@ -38,7 +38,7 @@ struct TORCH_API AdamWParamState
   TORCH_ARG(int64_t, step) = 0;
   TORCH_ARG(torch::Tensor, exp_avg);
   TORCH_ARG(torch::Tensor, exp_avg_sq);
-  TORCH_ARG(torch::Tensor, max_exp_avg_sq) = {};
+  TORCH_ARG(torch::Tensor, max_exp_avg_sq);
 
  public:
   void serialize(torch::serialize::InputArchive& archive) override;
diff --git a/torch/csrc/api/include/torch/optim/lbfgs.h b/torch/csrc/api/include/torch/optim/lbfgs.h
index 3d5f1832cf60..dc90113955fd 100644
--- a/torch/csrc/api/include/torch/optim/lbfgs.h
+++ b/torch/csrc/api/include/torch/optim/lbfgs.h
@@ -39,9 +39,9 @@ struct TORCH_API LBFGSParamState
   TORCH_ARG(int64_t, n_iter) = 0;
   TORCH_ARG(double, t) = 0;
   TORCH_ARG(double, prev_loss) = 0;
-  TORCH_ARG(Tensor, d) = {};
-  TORCH_ARG(Tensor, H_diag) = {};
-  TORCH_ARG(Tensor, prev_flat_grad) = {};
+  TORCH_ARG(Tensor, d);
+  TORCH_ARG(Tensor, H_diag);
+  TORCH_ARG(Tensor, prev_flat_grad);
   TORCH_ARG(std::deque<Tensor>, old_dirs);
   TORCH_ARG(std::deque<Tensor>, old_stps);
   TORCH_ARG(std::deque<Tensor>, ro);
diff --git a/torch/csrc/api/include/torch/optim/rmsprop.h b/torch/csrc/api/include/torch/optim/rmsprop.h
index 7b6b9dea5649..c6581b87a4b6 100644
--- a/torch/csrc/api/include/torch/optim/rmsprop.h
+++ b/torch/csrc/api/include/torch/optim/rmsprop.h
@@ -43,8 +43,8 @@ struct TORCH_API RMSpropParamState
     : public OptimizerCloneableParamState<RMSpropParamState> {
   TORCH_ARG(int64_t, step) = 0;
   TORCH_ARG(torch::Tensor, square_avg);
-  TORCH_ARG(torch::Tensor, momentum_buffer) = {};
-  TORCH_ARG(torch::Tensor, grad_avg) = {};
+  TORCH_ARG(torch::Tensor, momentum_buffer);
+  TORCH_ARG(torch::Tensor, grad_avg);
 
  public:
   void serialize(torch::serialize::InputArchive& archive) override;
diff --git a/torch/csrc/autograd/graph_task.h b/torch/csrc/autograd/graph_task.h
index 018beaffdaaf..b34d15c7d05c 100644
--- a/torch/csrc/autograd/graph_task.h
+++ b/torch/csrc/autograd/graph_task.h
@@ -122,7 +122,7 @@ struct GraphTask : std::enable_shared_from_this<GraphTask> {
 
   // Note: this field is not ready to be used until the proper
   // `thread_locals_.set_grad_mode()` call in the constructor.
-  at::ThreadLocalState thread_locals_ = at::ThreadLocalState();
+  at::ThreadLocalState thread_locals_;
 
   std::unordered_set<c10::Stream> leaf_streams;
 
diff --git a/torch/csrc/autograd/utils/lambda_post_hook.h b/torch/csrc/autograd/utils/lambda_post_hook.h
index e43d7a23876d..5f0f5626a4ad 100644
--- a/torch/csrc/autograd/utils/lambda_post_hook.h
+++ b/torch/csrc/autograd/utils/lambda_post_hook.h
@@ -36,7 +36,7 @@ class LambdaPostHook : public torch::autograd::FunctionPostHook {
 
  protected:
   std::function<variable_list(const variable_list&, const variable_list&)> fn_;
-  compiled_fn_type compiled_fn_{};
+  compiled_fn_type compiled_fn_;
 };
 
 } // namespace torch::autograd::utils
diff --git a/torch/csrc/distributed/c10d/FlightRecorder.hpp b/torch/csrc/distributed/c10d/FlightRecorder.hpp
index b0974495a87a..a04a597b5ccb 100644
--- a/torch/csrc/distributed/c10d/FlightRecorder.hpp
+++ b/torch/csrc/distributed/c10d/FlightRecorder.hpp
@@ -176,9 +176,9 @@ struct FlightRecorder {
   size_t max_entries_ = 0;
   size_t next_ = 0;
   size_t id_ = 0;
-  std::map<size_t, std::shared_ptr<ProcessGroupStatus>> all_pg_status_ = {};
+  std::map<size_t, std::shared_ptr<ProcessGroupStatus>> all_pg_status_;
   std::map<std::tuple<std::string, std::string>, std::vector<uint64_t>>
-      pg_name_to_ranks_ = {};
+      pg_name_to_ranks_;
   std::string comm_lib_version_;
 
   std::optional<size_t> record(
diff --git a/torch/csrc/distributed/c10d/NCCLUtils.hpp b/torch/csrc/distributed/c10d/NCCLUtils.hpp
index fcd55b6a655e..fdd50f69ef3d 100644
--- a/torch/csrc/distributed/c10d/NCCLUtils.hpp
+++ b/torch/csrc/distributed/c10d/NCCLUtils.hpp
@@ -367,7 +367,7 @@ class NCCLComm {
   int rank_{};
   // Optional reason for communicator failure, provided by ProcessGroupNCCL for
   // better error messaging.
-  std::optional<std::string> commFailureReason_{};
+  std::optional<std::string> commFailureReason_;
   bool initialized_{false};
   // Whether this communicator is using nonblocking mode. Recorded during comm
   // creation or split. For safety, we give a default value of true (more
diff --git a/torch/csrc/distributed/c10d/ParamCommsUtils.hpp b/torch/csrc/distributed/c10d/ParamCommsUtils.hpp
index d011b0e42ed1..678c98e91a0b 100644
--- a/torch/csrc/distributed/c10d/ParamCommsUtils.hpp
+++ b/torch/csrc/distributed/c10d/ParamCommsUtils.hpp
@@ -91,7 +91,7 @@ class TORCH_API ParamCommsDebugInfo : public c10::DebugInfoBase {
   std::vector<int64_t> outputSplitSizes_;
   int globalRankStart_{};
   int globalRankStride_{};
-  std::vector<int64_t> groupRanks_{};
+  std::vector<int64_t> groupRanks_;
 };
 
 #define RECORD_PARAM_COMMS(                                                    \
diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp
index 5a06a386d5ca..8c4a657fd7ee 100644
--- a/torch/csrc/distributed/c10d/ProcessGroup.hpp
+++ b/torch/csrc/distributed/c10d/ProcessGroup.hpp
@@ -11,7 +11,6 @@
 #include <ATen/core/dispatch/Dispatcher.h>
 #include <c10/macros/Macros.h>
 
-#include <torch/csrc/distributed/c10d/Work.hpp>
 // *************************************************************************
 // PROCESS GROUP collective communication API IS BEING CHANGED BETWEEN
 // versions 1.7 and 1.8.
diff --git a/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp b/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp
index 442cb490743b..07f0e26c2da9 100644
--- a/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp
+++ b/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp
@@ -284,7 +284,7 @@ class AsyncAllreduceWork : public ProcessGroupGloo::AsyncWork {
         reduceOp(std::move(reduceOp)),
         tag(tag) {}
 
-  std::vector<at::Tensor> inputs{};
+  std::vector<at::Tensor> inputs;
   const ReduceOp reduceOp;
   const uint32_t tag;
 
@@ -399,7 +399,7 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork {
         inputs(inputs),
         tag(tag) {}
 
-  std::vector<at::Tensor> inputs{};
+  std::vector<at::Tensor> inputs;
   const uint32_t tag;
 
   // We share dimensionality about the sparse tensors before collecting
diff --git a/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp b/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp
index 3386d8ee0a66..bff49938791d 100644
--- a/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp
+++ b/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp
@@ -732,7 +732,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {
     std::condition_variable workMetaListCV_;
 
     // Heartbeat of watchdog thread.
-    std::atomic_uint64_t heartbeat_{};
+    std::atomic_uint64_t heartbeat_;
 
     // Whether or not to propagate detected errors to all ranks in the same PG
     // through TCPStore.
@@ -1319,7 +1319,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {
   int traceBufferSize_;
 
   // We gate the cudaEventCache so that we can roll it out gradually.
-  std::atomic<bool> cudaEventCacheEnabled_{};
+  std::atomic<bool> cudaEventCacheEnabled_;
 
   std::thread onCompletionHookThread_;
 
@@ -1327,7 +1327,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {
   std::atomic<bool> terminateProcessGroup_;
 
   // Whether there are hooks pending to be fired
-  std::atomic<bool> hasPendingHooks_{};
+  std::atomic<bool> hasPendingHooks_;
 
   // This is the signal from watchdog threads to indicate whether the monitor
   // thread should dump. Making it static so that it is accessible from all the
@@ -1416,11 +1416,11 @@ class TORCH_API ProcessGroupNCCL : public Backend {
   // Whether or not to create start CUDAEvent and enable timing for start
   // and end events. Note that enableTiming_ is always true if desyncDebug_
   // is set to true.
-  std::atomic<bool> enableTiming_{};
+  std::atomic<bool> enableTiming_;
 
   // Flag to enable the print of hash value of input/output of collectives for
   // verification.
-  std::atomic<bool> enableCollectiveHashDebug_{};
+  std::atomic<bool> enableCollectiveHashDebug_;
 
   // Whether or not TORCH_NCCL_AVOID_RECORD_STREAMS was set
   bool avoidRecordStreams_ = false;
diff --git a/torch/csrc/distributed/c10d/TCPStore.hpp b/torch/csrc/distributed/c10d/TCPStore.hpp
index 75561cf597ae..2caab088a609 100644
--- a/torch/csrc/distributed/c10d/TCPStore.hpp
+++ b/torch/csrc/distributed/c10d/TCPStore.hpp
@@ -41,7 +41,7 @@ class TCPServer;
 class TCPClient;
 
 struct SocketAddress {
-  std::string host{};
+  std::string host;
   std::uint16_t port{};
 };
 
diff --git a/torch/csrc/distributed/c10d/TCPStoreBackend.hpp b/torch/csrc/distributed/c10d/TCPStoreBackend.hpp
index 3eb148f2bef8..d5f7f0248bba 100644
--- a/torch/csrc/distributed/c10d/TCPStoreBackend.hpp
+++ b/torch/csrc/distributed/c10d/TCPStoreBackend.hpp
@@ -65,7 +65,7 @@ class BackgroundThread {
 
  private:
   std::atomic<bool> is_running_{false};
-  std::thread daemonThread_{};
+  std::thread daemonThread_;
 };
 
 std::unique_ptr<BackgroundThread> create_tcpstore_backend(
diff --git a/torch/csrc/distributed/c10d/Utils.hpp b/torch/csrc/distributed/c10d/Utils.hpp
index c7a2e3523ae4..ff68af5b2b5d 100644
--- a/torch/csrc/distributed/c10d/Utils.hpp
+++ b/torch/csrc/distributed/c10d/Utils.hpp
@@ -437,7 +437,7 @@ inline at::Tensor newLikeFlat(
   }
   at::DeviceGuard gpuGuard(device);
   std::vector<int64_t> sizes{static_cast<int64_t>(tensors[deviceIdx].size())};
-  std::vector<int64_t> strides{static_cast<int64_t>(t.numel())};
+  std::vector<int64_t> strides{t.numel()};
   sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end());
   strides.insert(strides.end(), t.strides().begin(), t.strides().end());
   return at::empty_strided(
diff --git a/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp b/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp
index 7d3eb5038565..6aefca8eabd3 100644
--- a/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp
+++ b/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp
@@ -62,7 +62,7 @@ class TORCH_API StoreCollectives : public ControlCollectives {
   int rank_;
   int worldSize_;
 
-  c10::FastSet<std::string> seenKeys_{};
+  c10::FastSet<std::string> seenKeys_;
 };
 
 } // namespace c10d
diff --git a/torch/csrc/distributed/c10d/reducer.hpp b/torch/csrc/distributed/c10d/reducer.hpp
index 6707975d38ac..4e5ed6a9a5c3 100644
--- a/torch/csrc/distributed/c10d/reducer.hpp
+++ b/torch/csrc/distributed/c10d/reducer.hpp
@@ -26,8 +26,8 @@
 
 namespace c10d {
 
-constexpr int kDefaultFirstBucketBytes = int(1024 * 1024);
-constexpr int kDefaultBucketBytesCap = int(25 * 1024 * 1024);
+constexpr int kDefaultFirstBucketBytes = 1024 * 1024;
+constexpr int kDefaultBucketBytesCap = 25 * 1024 * 1024;
 // Collect runtime stats once for every kDDPRuntimeLoggingSampleRate iterations.
 constexpr int kDDPRuntimeLoggingSampleRate = 100;
 
diff --git a/torch/csrc/inductor/aoti_runtime/utils.h b/torch/csrc/inductor/aoti_runtime/utils.h
index 4eee0b09a83a..49255a858d4d 100644
--- a/torch/csrc/inductor/aoti_runtime/utils.h
+++ b/torch/csrc/inductor/aoti_runtime/utils.h
@@ -244,7 +244,7 @@ class RAIIC10IValueHandle {
 
 class MaybeOwningAtenTensorHandle {
  public:
-  MaybeOwningAtenTensorHandle() : handle_(nullptr), raii_handle_() {}
+  MaybeOwningAtenTensorHandle() : handle_(nullptr) {}
   // We skip copy constructor as MaybeOwningAtenTensorHandle might be RAII which
   // makes it undefined.
   MaybeOwningAtenTensorHandle(const MaybeOwningAtenTensorHandle& other) =
diff --git a/torch/csrc/jit/frontend/function_schema_parser.cpp b/torch/csrc/jit/frontend/function_schema_parser.cpp
index 4c824e6997bf..24b3adfd98cd 100644
--- a/torch/csrc/jit/frontend/function_schema_parser.cpp
+++ b/torch/csrc/jit/frontend/function_schema_parser.cpp
@@ -111,7 +111,7 @@ struct SchemaParser {
       L.expect(':');
       name = fmt::format("{}::{}", name, L.expect(TK_IDENT).text_view());
     }
-    std::string overload_name = "";
+    std::string overload_name;
     if (L.nextIf('.')) {
       overload_name = L.expect(TK_IDENT).text();
     }
diff --git a/torch/csrc/jit/frontend/lexer.h b/torch/csrc/jit/frontend/lexer.h
index 0faf6ff24da4..98c235bc24f1 100644
--- a/torch/csrc/jit/frontend/lexer.h
+++ b/torch/csrc/jit/frontend/lexer.h
@@ -412,11 +412,7 @@ struct Token {
 
 struct Lexer {
   explicit Lexer(std::shared_ptr<Source> source)
-      : source(std::move(source)),
-
-        indent_stack(),
-        next_tokens(),
-        shared(sharedParserData()) {
+      : source(std::move(source)), shared(sharedParserData()) {
     auto first_indent = lexRaw(true);
     indent_stack.push_back(first_indent.range.size());
     lex();
diff --git a/torch/csrc/jit/frontend/sugared_value.cpp b/torch/csrc/jit/frontend/sugared_value.cpp
index 0e9f0c9c2178..f9a80cf4da5e 100644
--- a/torch/csrc/jit/frontend/sugared_value.cpp
+++ b/torch/csrc/jit/frontend/sugared_value.cpp
@@ -867,7 +867,7 @@ std::shared_ptr<SugaredValue> TorchCheckValue::call(
     }
   }
 
-  if (args.size() >= 1) {
+  if (!args.empty()) {
     if (found_cond_kwarg) {
       throw(
           ErrorReport(loc)
diff --git a/torch/csrc/jit/ir/ir.cpp b/torch/csrc/jit/ir/ir.cpp
index 36483b928918..d7aaac509590 100644
--- a/torch/csrc/jit/ir/ir.cpp
+++ b/torch/csrc/jit/ir/ir.cpp
@@ -1769,7 +1769,7 @@ Node* Graph::createTupleSlice(
 
   int64_t i = beg;
   for ([[maybe_unused]] const auto j : c10::irange(num_values)) {
-    auto idx = insertConstant(IValue(static_cast<int64_t>(i)));
+    auto idx = insertConstant(IValue(i));
     auto tupleIndex = insertNode(createTupleIndex(tup, idx, tt->elements()[i]));
 
     new_vals.push_back(tupleIndex->output());
diff --git a/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp b/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp
index 8d847ddeb533..23752d5f041c 100644
--- a/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp
+++ b/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp
@@ -16,10 +16,6 @@
 #include <unordered_set>
 #include <vector>
 
-namespace c10 {
-TypePtr parseType(const std::string& pythonStr);
-} // namespace c10
-
 namespace torch::jit {
 
 using caffe2::serialize::FileAdapter;
@@ -67,8 +63,7 @@ std::vector<IValue> get_bytecode_ivalues(PyTorchStreamReader& reader) {
 /********************** Bytecode **********************/
 
 // Forward declare
-uint64_t _get_model_bytecode_version(
-    const std::vector<IValue>& bytecode_ivalues);
+
 static uint64_t _get_model_bytecode_version_from_bytes(char* data, size_t size);
 
 uint64_t _get_model_bytecode_version(std::istream& in) {
@@ -250,8 +245,6 @@ std::unordered_map<std::string, OperatorInfo> _get_model_ops_and_info(
 /********************** Get Type Table **********************/
 
 // Forward declare
-std::unordered_set<std::string> _get_mobile_model_contained_types(
-    const std::vector<IValue>& bytecode_ivalues);
 
 std::unordered_set<std::string> _get_mobile_model_contained_types(
     std::istream& in) {
diff --git a/torch/csrc/jit/mobile/compatibility/model_compatibility.h b/torch/csrc/jit/mobile/compatibility/model_compatibility.h
index 59ae2b1f23a4..03be3dbeb1c6 100644
--- a/torch/csrc/jit/mobile/compatibility/model_compatibility.h
+++ b/torch/csrc/jit/mobile/compatibility/model_compatibility.h
@@ -93,7 +93,7 @@ enum ModelCompatibilityStatus {
 
 struct ModelCompatCheckResult {
   ModelCompatibilityStatus status;
-  std::vector<std::string> errors{};
+  std::vector<std::string> errors;
 };
 // Takes in information about a runtime and a model and returns if the two are
 // compatible with one another.
diff --git a/torch/csrc/jit/mobile/compatibility/runtime_compatibility.cpp b/torch/csrc/jit/mobile/compatibility/runtime_compatibility.cpp
index c3c86a7d2698..35aeb435330e 100644
--- a/torch/csrc/jit/mobile/compatibility/runtime_compatibility.cpp
+++ b/torch/csrc/jit/mobile/compatibility/runtime_compatibility.cpp
@@ -7,10 +7,6 @@
 #include <torch/custom_class.h>
 #include <unordered_map>
 
-namespace c10 {
-TypePtr parseType(const std::string& pythonStr);
-} // namespace c10
-
 namespace torch::jit {
 
 uint64_t _get_runtime_bytecode_version() {
diff --git a/torch/csrc/jit/mobile/flatbuffer_loader.h b/torch/csrc/jit/mobile/flatbuffer_loader.h
index 24c670e01f79..b34bb8809380 100644
--- a/torch/csrc/jit/mobile/flatbuffer_loader.h
+++ b/torch/csrc/jit/mobile/flatbuffer_loader.h
@@ -121,13 +121,6 @@ TORCH_API mobile::Module parse_flatbuffer_no_object(
     size_t size,
     std::optional<at::Device> device);
 
-TORCH_API mobile::Module parse_and_initialize_mobile_module(
-    void* data,
-    size_t,
-    std::optional<at::Device>,
-    ExtraFilesMap* extra_files,
-    bool should_copy_tensor_memory);
-
 // no op, TODO(qihan) delete
 TORCH_API bool register_flatbuffer_loader();
 
diff --git a/torch/csrc/jit/mobile/import.cpp b/torch/csrc/jit/mobile/import.cpp
index 089a0df564a0..6a0ba7e038ea 100644
--- a/torch/csrc/jit/mobile/import.cpp
+++ b/torch/csrc/jit/mobile/import.cpp
@@ -87,8 +87,6 @@ using caffe2::serialize::MemoryReadAdapter;
 using caffe2::serialize::PyTorchStreamReader;
 using caffe2::serialize::ReadAdapterInterface;
 
-OpCode parseOpCode(const char* str);
-
 TypePtr resolveTypeNameMobile(
     const c10::QualifiedName& qn,
     const std::shared_ptr<CompilationUnit>& compilation_unit) {
@@ -216,7 +214,7 @@ class BytecodeDeserializer final {
       mobile::Function* function);
   std::shared_ptr<CompilationUnit> compilation_unit_;
   std::unordered_set<std::string> imported_libs_;
-  std::unique_ptr<PyTorchStreamReader> reader_{};
+  std::unique_ptr<PyTorchStreamReader> reader_;
   std::optional<at::Device> device_;
   uint64_t module_load_options_;
   // From `version` or `.data/version` in model.ptl and it's compute
diff --git a/torch/csrc/jit/mobile/interpreter.cpp b/torch/csrc/jit/mobile/interpreter.cpp
index c2197fcdcb35..b5e67cd83cbb 100644
--- a/torch/csrc/jit/mobile/interpreter.cpp
+++ b/torch/csrc/jit/mobile/interpreter.cpp
@@ -17,7 +17,7 @@
 #include <torch/csrc/jit/runtime/vararg_functions.h>
 
 namespace torch::jit {
-std::ostream& operator<<(std::ostream& out, Instruction inst);
+
 namespace mobile {
 InterpreterState::InterpreterState(const Code& code) {
   enterFrame(code);
diff --git a/torch/csrc/jit/mobile/parse_bytecode.cpp b/torch/csrc/jit/mobile/parse_bytecode.cpp
index eb95976d451b..1a1e278e371f 100644
--- a/torch/csrc/jit/mobile/parse_bytecode.cpp
+++ b/torch/csrc/jit/mobile/parse_bytecode.cpp
@@ -9,7 +9,7 @@
 #include <torch/custom_class_detail.h>
 
 namespace torch::jit {
-OpCode parseOpCode(const char* str);
+
 using c10::IValue;
 
 IValue expect_field(
diff --git a/torch/csrc/jit/mobile/train/optim/sgd.cpp b/torch/csrc/jit/mobile/train/optim/sgd.cpp
index ae1a40e10621..1523c5629a9c 100644
--- a/torch/csrc/jit/mobile/train/optim/sgd.cpp
+++ b/torch/csrc/jit/mobile/train/optim/sgd.cpp
@@ -84,7 +84,7 @@ Tensor SGD::step(const LossClosure& closure) {
     loss = closure();
   }
   for (auto& group : param_groups_) {
-    auto& options = static_cast<SGDOptions&>(group.options());
+    auto& options = group.options();
     auto weight_decay = options.weight_decay();
     auto momentum = options.momentum();
     auto dampening = options.dampening();
diff --git a/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp b/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp
index 680f7683009c..af3a0d641016 100644
--- a/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp
+++ b/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp
@@ -272,8 +272,7 @@ Operation createUnaryOp(
     TORCH_INTERNAL_ASSERT(
         a_it.get_desc().get_size() % elementSize(a.scalar_type()) == 0);
 
-    auto out_aten = at::from_blob(
-        out_raw_data, {static_cast<int64_t>(nelem)}, a_options_with_strided);
+    auto out_aten = at::from_blob(out_raw_data, nelem, a_options_with_strided);
     aten_op(out_aten, in_aten);
     push(stack, out);
   };
diff --git a/torch/csrc/jit/passes/onnx/helper.h b/torch/csrc/jit/passes/onnx/helper.h
index 09b31576998a..cad60e8816d3 100644
--- a/torch/csrc/jit/passes/onnx/helper.h
+++ b/torch/csrc/jit/passes/onnx/helper.h
@@ -28,9 +28,6 @@ TORCH_API ValueToParamPairMap
 buildValueToParamsMap(Block* b, const ParamMap& paramsDict);
 TORCH_API void eraseUnusedValuesFromMap(ValueToParamPairMap& valsToParamsMap);
 TORCH_API void eraseUnusedBlockInputs(Block* b);
-TORCH_API void buildParamsMapFromValueToParamsMap(
-    const ValueToParamPairMap& valsToParamsMap,
-    ParamMap& paramsDict);
 
 TORCH_API Node* addNodeToBlock(
     Block* block,
diff --git a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp
index 452b18f3efc3..ccb6e0bc163a 100644
--- a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp
+++ b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp
@@ -1439,8 +1439,8 @@ void ComputeConstant(Node* n, int opset_version) {
                   for (auto cur_dim : shape_vector_0) {
                     num_elements *= cur_dim.static_size();
                   }
-                  dims.emplace_back(c10::ShapeSymbol::fromStaticSize(
-                      static_cast<int64_t>(num_elements)));
+                  dims.emplace_back(
+                      c10::ShapeSymbol::fromStaticSize(num_elements));
                 }
               }
             }
diff --git a/torch/csrc/jit/passes/quantization/quantization_patterns.h b/torch/csrc/jit/passes/quantization/quantization_patterns.h
index 86d7b5857c49..e30688ed6e21 100644
--- a/torch/csrc/jit/passes/quantization/quantization_patterns.h
+++ b/torch/csrc/jit/passes/quantization/quantization_patterns.h
@@ -16,7 +16,7 @@ struct QuantFusionInfo {
   std::string quantized_op_name;
   std::string pattern;
   std::string replacement;
-  std::vector<MatchFilter> filters = {};
+  std::vector<MatchFilter> filters;
 };
 
 namespace {
diff --git a/torch/csrc/jit/passes/tensorexpr_fuser.cpp b/torch/csrc/jit/passes/tensorexpr_fuser.cpp
index bb052fc8421f..672a9949c6b9 100644
--- a/torch/csrc/jit/passes/tensorexpr_fuser.cpp
+++ b/torch/csrc/jit/passes/tensorexpr_fuser.cpp
@@ -16,7 +16,6 @@
 #include <torch/csrc/jit/passes/pass_manager.h>
 #include <torch/csrc/jit/passes/remove_redundant_profiles.h>
 #include <torch/csrc/jit/passes/symbolic_shape_runtime_fusion.h>
-#include <torch/csrc/jit/passes/tensorexpr_fuser.h>
 #include <torch/csrc/jit/passes/utils/subgraph_utils.h>
 #include <torch/csrc/jit/runtime/custom_operator.h>
 #include <torch/csrc/jit/runtime/graph_executor.h>
diff --git a/torch/csrc/jit/python/python_arg_flatten.cpp b/torch/csrc/jit/python/python_arg_flatten.cpp
index 655bbb5edac3..b71f21d043a3 100644
--- a/torch/csrc/jit/python/python_arg_flatten.cpp
+++ b/torch/csrc/jit/python/python_arg_flatten.cpp
@@ -78,8 +78,7 @@ void flatten_rec(PyObject* obj, ParsedArgs& args) {
     args.desc.metadata.emplace_back(var);
     args.desc.structure.push_back(D::Bool);
   } else if (PyLong_Check(obj)) { // Wrap longs in Long tensors
-    at::Tensor var = scalar_to_tensor(
-        at::Scalar(static_cast<int64_t>(THPUtils_unpackLong(obj))));
+    at::Tensor var = scalar_to_tensor(at::Scalar(THPUtils_unpackLong(obj)));
     args.vars.push_back(var);
     args.desc.metadata.emplace_back(var);
     args.desc.structure.push_back(D::Long);
diff --git a/torch/csrc/jit/runtime/instruction.h b/torch/csrc/jit/runtime/instruction.h
index 73c78adbda03..fbaca4b6ea78 100644
--- a/torch/csrc/jit/runtime/instruction.h
+++ b/torch/csrc/jit/runtime/instruction.h
@@ -95,6 +95,5 @@ std::ostream& operator<<(std::ostream& out, Instruction inst);
 bool isOpSupportedInMobile(OpCode op);
 char const* toString(OpCode op);
 OpCode parseOpCode(const char* str);
-std::ostream& operator<<(std::ostream& out, Instruction inst);
 
 } // namespace torch::jit
diff --git a/torch/csrc/jit/runtime/register_prim_ops.cpp b/torch/csrc/jit/runtime/register_prim_ops.cpp
index d59b93190e36..4aa098d870f5 100644
--- a/torch/csrc/jit/runtime/register_prim_ops.cpp
+++ b/torch/csrc/jit/runtime/register_prim_ops.cpp
@@ -1710,7 +1710,7 @@ int64_t stringFindImpl(
     bool reverse = false) {
   int64_t size = string.size();
   if (start < 0) {
-    start = std::max(int64_t(0), int64_t(size + start));
+    start = std::max(int64_t(0), size + start);
   }
   if (end < 0) {
     end = std::max(int64_t(0), int64_t(size + end + 1));
@@ -1964,7 +1964,7 @@ static const std::vector<OperatorGeneratorArgs> stringOpGenArgs{
             return;
           }
           if (start < 0) {
-            start = std::max(int64_t(0), int64_t(size + start));
+            start = std::max(int64_t(0), size + start);
           }
           if (end < 0) {
             end = std::max(int64_t(0), int64_t(size + end + 1));
@@ -1993,7 +1993,7 @@ static const std::vector<OperatorGeneratorArgs> stringOpGenArgs{
           std::string string = pop(stack).toStringRef();
           int64_t size = string.size();
           if (start < 0) {
-            start = std::max(int64_t(0), int64_t(size + start));
+            start = std::max(int64_t(0), (size + start));
           }
           if (end < 0) {
             end = std::max(int64_t(0), int64_t(size + end + 1));
@@ -2019,7 +2019,7 @@ static const std::vector<OperatorGeneratorArgs> stringOpGenArgs{
           std::string string = pop(stack).toStringRef();
           int64_t size = string.size();
           if (start < 0) {
-            start = std::max(int64_t(0), int64_t(size + start));
+            start = std::max(int64_t(0), (size + start));
           }
           if (end < 0) {
             end = std::max(int64_t(0), int64_t(size + end + 1));
diff --git a/torch/csrc/jit/runtime/static/impl.cpp b/torch/csrc/jit/runtime/static/impl.cpp
index 78378b04b4a6..0a6e0b3564ad 100644
--- a/torch/csrc/jit/runtime/static/impl.cpp
+++ b/torch/csrc/jit/runtime/static/impl.cpp
@@ -1098,7 +1098,7 @@ namespace {
 
 void destroyNodeOutputs(ProcessedNode& p_node) {
   const auto borrows_outputs = borrowsOutputs(p_node.node()->kind());
-  const auto num_outputs = static_cast<uint32_t>(p_node.num_outputs());
+  const auto num_outputs = p_node.num_outputs();
   for (const auto i : c10::irange<uint32_t>(num_outputs)) {
     auto& output = p_node.Output(i);
     if (doesNotHeapAllocateWhenStoredInIValue(*output.type())) {
@@ -1863,7 +1863,7 @@ bool BlockRunner::check_for_memory_leak(
   const auto num_nodes = static_cast<uint32_t>(nodes_.size());
   for (const auto n : c10::irange(num_nodes)) {
     auto& pnode = nodes_[n];
-    const auto num_outputs = static_cast<uint32_t>(pnode.num_outputs());
+    const auto num_outputs = pnode.num_outputs();
     for (const auto i : c10::irange(num_outputs)) {
       const IValue* ival = &pnode.Output(i);
       const Value* val = pnode.node()->output(i);
@@ -1943,7 +1943,7 @@ bool BlockRunner::checkOutputTensorMemoryLeaks() {
   const auto num_nodes = static_cast<uint32_t>(nodes_.size());
   for (const auto n : c10::irange(num_nodes)) {
     auto& pnode = nodes_[n];
-    const auto num_outputs = static_cast<uint32_t>(pnode.num_outputs());
+    const auto num_outputs = pnode.num_outputs();
     for (const auto i : c10::irange(num_outputs)) {
       const IValue* ival = &pnode.Output(i);
       const Value* val = pnode.node()->output(i);
@@ -2042,7 +2042,7 @@ ProcessedFunction::ProcessedFunction(
         stack.emplace_back(static_cast<int>(size));
       }
       node_op(stack);
-      const auto num_outputs = static_cast<uint32_t>(pnode->num_outputs());
+      const auto num_outputs = pnode->num_outputs();
       TORCH_DCHECK_EQ(stack.size(), num_outputs);
       for (const auto i : c10::irange(num_outputs)) {
         pnode->Output(i) = std::move(stack[i]);
@@ -2158,7 +2158,7 @@ bool ProcessedNode::verify_no_memory_overlap(bool force_check) const {
 }
 
 bool ProcessedNode::verify_outputs_dont_overlap_each_other() const {
-  const auto n_outputs = static_cast<uint32_t>(num_outputs());
+  const auto n_outputs = num_outputs();
   for (const auto i : c10::irange(n_outputs)) {
     if (!Output(i).isTensor()) {
       continue;
@@ -2196,7 +2196,7 @@ bool ProcessedNode::verify_inputs_dont_overlap_outputs(bool force_check) const {
     return true;
   }
   const auto n_inputs = static_cast<uint32_t>(inputs_.size());
-  const auto n_outputs = static_cast<uint32_t>(num_outputs());
+  const auto n_outputs = num_outputs();
   for (const auto i : c10::irange<uint32_t>(n_inputs)) {
     const IValue* in = &Input(i);
     if (!in->isTensor()) {
@@ -2235,7 +2235,7 @@ bool ProcessedNode::check_and_correct_overlap_with(
 
 void ProcessedNode::verify_and_correct_memory_overlap() {
   const auto n_inputs = static_cast<uint32_t>(inputs_.size());
-  const auto n_outputs = static_cast<uint32_t>(num_outputs());
+  const auto n_outputs = num_outputs();
   for (const auto i : c10::irange(n_inputs)) {
     const IValue& in = Input(i);
     if (!in.isTensor()) {
diff --git a/torch/csrc/jit/runtime/static/impl.h b/torch/csrc/jit/runtime/static/impl.h
index b25f63c939b0..24f8f01d7547 100644
--- a/torch/csrc/jit/runtime/static/impl.h
+++ b/torch/csrc/jit/runtime/static/impl.h
@@ -142,9 +142,9 @@ class TORCH_API ManagedTensorRanges {
 
   // Maps Node* to the set of managed tensors that are now available
   // for reuse after this node.
-  c10::FastMap<Node*, std::vector<const Value*>> node_to_newly_free_tensors_{};
+  c10::FastMap<Node*, std::vector<const Value*>> node_to_newly_free_tensors_;
   // Maps each Value* to its lifetime (start node index, end node index)
-  c10::FastMap<const Value*, Lifetime> value_lifetimes_{};
+  c10::FastMap<const Value*, Lifetime> value_lifetimes_;
 };
 
 struct TORCH_API StaticModuleOptions {
@@ -395,7 +395,7 @@ class BlockInfo {
   c10::FastSet<const Value*> managed_output_tensor_values_;
   c10::FastSet<const Value*> leaked_values_;
 
-  ManagedTensorRanges managed_tensor_ranges_{};
+  ManagedTensorRanges managed_tensor_ranges_;
 
   // The index of this block's inputs in the shared values_ array.
   const uint16_t input_idx_;
@@ -549,7 +549,7 @@ class TORCH_API StaticModule {
   // IValue table (defined by prim::Constant nodes)
   std::vector<IValue> constants_;
   // The functions to be called by corresponding ProcessedNode.
-  std::vector<ProcessedFunction> functions_{};
+  std::vector<ProcessedFunction> functions_;
   // A list of pre-processed nodes from which ProcessedNode are created per
   // StaticRuntime instance.
   std::vector<StaticNodeInfo> nodes_;
diff --git a/torch/csrc/jit/runtime/static/memory_planner.h b/torch/csrc/jit/runtime/static/memory_planner.h
index 018b8947a07c..d9755d83048c 100644
--- a/torch/csrc/jit/runtime/static/memory_planner.h
+++ b/torch/csrc/jit/runtime/static/memory_planner.h
@@ -35,7 +35,7 @@ class StorageGroup {
   // allocated for all tensors in this storage group. Initially it
   // is zero, eventually it gets updated by the MemoryPlanner.
   size_t max_tensor_size_ = 0;
-  std::vector<at::Tensor*> group_{};
+  std::vector<at::Tensor*> group_;
 };
 
 // A contiguous buffer of `StorageImpl`s
@@ -263,7 +263,7 @@ class MemoryPlanner {
   // to an ordinary "strong reference" state.
   std::vector<IValue*> borrowed_ivalues_needing_incref_;
 
-  std::vector<std::pair<size_t, at::Tensor*>> managed_output_tensors_{};
+  std::vector<std::pair<size_t, at::Tensor*>> managed_output_tensors_;
   at::DataPtr buffer_; // allocated each time we call Run()
   uint8_t* buffer_start_{nullptr};
   uint8_t* buffer_end_{nullptr};
@@ -292,7 +292,7 @@ class StandardMemoryPlanner : public MemoryPlanner {
   void allocateManagedTensors() override;
   void deallocateManagedTensors() override;
 
-  std::vector<StorageGroup> managed_tensors_{};
+  std::vector<StorageGroup> managed_tensors_;
 };
 
 } // namespace torch::jit
diff --git a/torch/csrc/jit/serialization/export.cpp b/torch/csrc/jit/serialization/export.cpp
index 6184889e5f10..59ed5281db6b 100644
--- a/torch/csrc/jit/serialization/export.cpp
+++ b/torch/csrc/jit/serialization/export.cpp
@@ -12,7 +12,6 @@
 #include <torch/csrc/jit/passes/dead_code_elimination.h>
 #include <torch/csrc/jit/passes/inliner.h>
 #include <torch/csrc/jit/runtime/instruction.h>
-#include <torch/csrc/jit/serialization/export.h>
 #include <torch/csrc/jit/serialization/import_export_constants.h>
 #include <torch/csrc/jit/serialization/import_export_functions.h>
 #include <torch/csrc/jit/serialization/import_export_helpers.h>
diff --git a/torch/csrc/jit/serialization/export_module.cpp b/torch/csrc/jit/serialization/export_module.cpp
index e0ded27d375b..36c1804a06b7 100644
--- a/torch/csrc/jit/serialization/export_module.cpp
+++ b/torch/csrc/jit/serialization/export_module.cpp
@@ -661,10 +661,10 @@ void ScriptModuleSerializer::writeByteCode(
   BackendDebugInfoRecorder debug_info_recorder;
   int64_t version_to_write = caffe2::serialize::kProducedBytecodeVersion;
 
-  elements.emplace_back(static_cast<int64_t>(version_to_write));
+  elements.emplace_back(version_to_write);
   std::vector<c10::IValue> debug_info_elements;
   // Always save debug handles
-  debug_info_elements.emplace_back(static_cast<int64_t>(version_to_write));
+  debug_info_elements.emplace_back(version_to_write);
 
   mobile::Module mobile_module =
       jitModuleToMobile(module, getOptionsFromGlobal());
@@ -913,7 +913,7 @@ void save_jit_module_to_write_func(
     const std::function<size_t(const void*, size_t)>& writer_func) {
   (void)save_mobile_debug_info;
   auto buffer = save_jit_module_to_bytes(module, extra_files);
-  writer_func(reinterpret_cast<void*>(buffer->data()), buffer->size());
+  writer_func(buffer->data(), buffer->size());
 }
 
 void ExportModule(
diff --git a/torch/csrc/jit/serialization/import.h b/torch/csrc/jit/serialization/import.h
index 0e2024483f4a..aa7d457d2b2c 100644
--- a/torch/csrc/jit/serialization/import.h
+++ b/torch/csrc/jit/serialization/import.h
@@ -140,12 +140,6 @@ TORCH_API Module load_jit_module_from_stream(
     ExtraFilesMap& extra_files,
     std::optional<at::Device> device = std::nullopt);
 
-TORCH_API Module parse_and_initialize_jit_module(
-    const std::shared_ptr<char>& data,
-    size_t size,
-    ExtraFilesMap& extra_files,
-    std::optional<at::Device> device);
-
 TORCH_API c10::intrusive_ptr<c10::ivalue::Object> ObjLoaderFunc(
     const at::StrongTypePtr& type,
     IValue input);
diff --git a/torch/csrc/jit/serialization/pickler_helper.h b/torch/csrc/jit/serialization/pickler_helper.h
index b27d974a10e9..c074ab38c70a 100644
--- a/torch/csrc/jit/serialization/pickler_helper.h
+++ b/torch/csrc/jit/serialization/pickler_helper.h
@@ -53,7 +53,7 @@ enum class PickleOpCode : char {
   BINFLOAT = 'G',
 
   // Protocol 2
-  PROTO = char('\x80'),
+  PROTO = '\x80',
   NEWOBJ = '\x81',
   EXT1 = '\x82',
   EXT2 = '\x83',
@@ -71,7 +71,7 @@ enum class PickleOpCode : char {
   SHORT_BINBYTES = 'C',
 
   // Protocol 4
-  SHORT_BINUNICODE = char('\x8c'),
+  SHORT_BINUNICODE = '\x8c',
   BINUNICODE8 = '\x8d',
   BINBYTES8 = '\x8e',
   EMPTY_SET = '\x8f',
diff --git a/torch/csrc/jit/serialization/source_range_serialization.cpp b/torch/csrc/jit/serialization/source_range_serialization.cpp
index b9a56bc87523..caefafc6632e 100644
--- a/torch/csrc/jit/serialization/source_range_serialization.cpp
+++ b/torch/csrc/jit/serialization/source_range_serialization.cpp
@@ -167,9 +167,7 @@ std::vector<char> SourceRangePickler::pickle(
     }
 
     ivalues.emplace_back(c10::ivalue::Tuple::create(
-        {(int64_t)range.bytes,
-         srs->serialize(range.range),
-         static_cast<int64_t>(source_range_tag)}));
+        {(int64_t)range.bytes, srs->serialize(range.range), source_range_tag}));
   }
 
   std::vector<at::Tensor> table;
diff --git a/torch/csrc/jit/serialization/unpickler.cpp b/torch/csrc/jit/serialization/unpickler.cpp
index 9d23cf6d97c2..e520dd0b45c0 100644
--- a/torch/csrc/jit/serialization/unpickler.cpp
+++ b/torch/csrc/jit/serialization/unpickler.cpp
@@ -1063,10 +1063,10 @@ void Unpickler::rebuildRRef() {
     // const reference will extend the lifetime of the temporary variable
     const auto& rrefId = distributed::rpc::RRefId(
         static_cast<int16_t>(args.at(distributed::rpc::RREFID_ON_IDX).toInt()),
-        static_cast<int64_t>(args.at(distributed::rpc::RREFID_ID_IDX).toInt()));
+        args.at(distributed::rpc::RREFID_ID_IDX).toInt());
     const auto& forkId = distributed::rpc::RRefId(
         static_cast<int16_t>(args.at(distributed::rpc::FORKID_ON_IDX).toInt()),
-        static_cast<int64_t>(args.at(distributed::rpc::FORKID_ID_IDX).toInt()));
+        args.at(distributed::rpc::FORKID_ID_IDX).toInt());
     auto parent =
         static_cast<int16_t>(args.at(distributed::rpc::PARENT_IDX).toInt());
     const auto& typeStr = static_cast<std::string>(
diff --git a/torch/csrc/jit/tensorexpr/cuda_codegen.cpp b/torch/csrc/jit/tensorexpr/cuda_codegen.cpp
index dbf9536ee227..35b54acaa8c3 100644
--- a/torch/csrc/jit/tensorexpr/cuda_codegen.cpp
+++ b/torch/csrc/jit/tensorexpr/cuda_codegen.cpp
@@ -1082,8 +1082,7 @@ void CudaCodeGen::call_with_numel(void** args, int64_t numel) {
   // https://stackoverflow.com/questions/34388712/cannot-understand-how-jcuda-culaunchkernel-work
   std::vector<void*> ptr_to_args(buffer_args.size());
   for (size_t i = 0; i < buffer_args.size(); i++) {
-    ptr_to_args[i] =
-        buffer_args[i].isVar() ? args[i] : const_cast<void**>(&args[i]);
+    ptr_to_args[i] = buffer_args[i].isVar() ? args[i] : (&args[i]);
   }
 
   const auto device = this->device().index();
diff --git a/torch/csrc/jit/tensorexpr/eval.h b/torch/csrc/jit/tensorexpr/eval.h
index 8cbc1689e0c9..2582ec5797dd 100644
--- a/torch/csrc/jit/tensorexpr/eval.h
+++ b/torch/csrc/jit/tensorexpr/eval.h
@@ -127,7 +127,7 @@ To raw_bitcast(const From& src) {
   TORCH_CHECK(sizeof(To) == sizeof(From), "Invalid bitcast invocation");
   To storage;
   std::memcpy(&storage, &src, sizeof(To));
-  return reinterpret_cast<To&>(storage);
+  return storage;
 }
 
 class SimpleIREvaluatorImpl;
diff --git a/torch/csrc/jit/tensorexpr/kernel.cpp b/torch/csrc/jit/tensorexpr/kernel.cpp
index a8ffa40f58db..cc1566372038 100644
--- a/torch/csrc/jit/tensorexpr/kernel.cpp
+++ b/torch/csrc/jit/tensorexpr/kernel.cpp
@@ -1482,7 +1482,7 @@ std::vector<BufPtr> TensorExprKernel::preAllocIntermediateBufs(
       remaining_interm_bufs.push_back(buf);
       continue;
     }
-    auto bp = (void*)malloc(size);
+    auto bp = malloc(size);
     if (!bp) {
       remaining_interm_bufs.push_back(buf);
       continue;
diff --git a/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp b/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp
index dd5c51d63153..46a09314fb7b 100644
--- a/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp
+++ b/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp
@@ -369,7 +369,7 @@ void loopnestRandomization(int64_t seed, LoopNest& l) {
 
           // Find a random number of loops to fuse
           int num_loops_to_fuse =
-              std::max(2, (int)(std::rand() % (int)loops.size()));
+              std::max(2, (std::rand() % (int)loops.size()));
 
           auto [loops_to_fuse, chosen_indices] =
               randomization_helper::select_n_randomly<ForPtr>(
diff --git a/torch/csrc/jit/tensorexpr/operators/quantization.h b/torch/csrc/jit/tensorexpr/operators/quantization.h
index 51bdbe730a6a..a33eb1081450 100644
--- a/torch/csrc/jit/tensorexpr/operators/quantization.h
+++ b/torch/csrc/jit/tensorexpr/operators/quantization.h
@@ -42,13 +42,6 @@ TORCH_API Tensor computeQuantizedConv2dPrepack(
     const std::optional<ScalarType>& outputType,
     at::Device device);
 
-TORCH_API Tensor computeQuantizedConv1d(
-    const std::vector<ArgValue>& inputs,
-    const std::vector<ExprHandle>& outputShape,
-    const std::vector<ExprHandle>& outputStrides,
-    const std::optional<ScalarType>& outputType,
-    at::Device device);
-
 TORCH_API Tensor computeQuantizedConv2d(
     const std::vector<ArgValue>& inputs,
     const std::vector<ExprHandle>& outputShape,
diff --git a/torch/csrc/profiler/collection.h b/torch/csrc/profiler/collection.h
index 515d33053537..c0f25add5273 100644
--- a/torch/csrc/profiler/collection.h
+++ b/torch/csrc/profiler/collection.h
@@ -369,7 +369,7 @@ struct ExtraFields<EventType::Kineto> {
   uint64_t correlation_id_{0};
   libkineto::ActivityType activity_type_;
   Flow flow;
-  std::weak_ptr<Result> linked_activity_{};
+  std::weak_ptr<Result> linked_activity_;
   std::string metadata_json_;
 };
 
diff --git a/torch/csrc/profiler/unwind/fde.h b/torch/csrc/profiler/unwind/fde.h
index cb3de64486b8..083578ec391e 100644
--- a/torch/csrc/profiler/unwind/fde.h
+++ b/torch/csrc/profiler/unwind/fde.h
@@ -57,7 +57,7 @@ struct FDE {
       throw UnwindError("unsupported 'eh' augmentation string");
     }
     code_alignment_factor_ = static_cast<int64_t>(LC.readULEB128());
-    data_alignment_factor_ = static_cast<int64_t>(LC.readSLEB128());
+    data_alignment_factor_ = LC.readSLEB128();
     if (version == 1) {
       ra_register_ = LC.read<uint8_t>();
     } else {