Revert "[7/N] Fix Wextra-semi warning (#140225)"

This reverts commit ffb979032dc149b4c895526fe5b92d713ed7b1e1. Reverted https://github.com/pytorch/pytorch/pull/140225 on behalf of https://github.com/kit1980 due to breaking internal builds ([comment](https://github.com/pytorch/pytorch/pull/140225#issuecomment-2469312229))
2025-10-20 21:14:14 +08:00 · 2024-11-12 00:02:06 +00:00
parent 0af38b1034
commit dbb55b448b
27 changed files with 471 additions and 470 deletions
--- a/.clang-format
+++ b/.clang-format
@ -101,16 +101,9 @@ SpacesInParentheses: false
 SpacesInSquareBrackets: false
 Standard:        c++17
 StatementMacros:
-  - C10_DEFINE_bool
-  - C10_DEFINE_int
-  - C10_DEFINE_int32
-  - C10_DEFINE_int64
-  - C10_DEFINE_string
  - PyObject_HEAD
  - PyObject_VAR_HEAD
  - PyException_HEAD
-  - DEFINE_BINARY
-
 TabWidth:        8
 UseTab:          Never
 ---
--- a/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h
+++ b/aten/src/ATen/cpu/vec/vec512/vec512_bfloat16.h
@ -1594,8 +1594,8 @@ inline std::tuple<Vectorized<float>, Vectorized<float>> convert_##name##_float(c
 inline Vectorized<type> convert_float_##name(const Vectorized<float>& a, const Vectorized<float>& b) { \
 return cvt_from_fp32<type>(__m512(a), __m512(b)); \
 }
-CONVERT_VECTORIZED_INIT(BFloat16, bfloat16)
-CONVERT_VECTORIZED_INIT(Half, half)
+CONVERT_VECTORIZED_INIT(BFloat16, bfloat16);
+CONVERT_VECTORIZED_INIT(Half, half);

 #else //defined(CPU_CAPABILITY_AVX512)

@ -1624,8 +1624,8 @@ inline Vectorized<type> convert_float_##name(const Vectorized<float>& a, const V
  } \
  return Vectorized<type>::loadu(arr2); \
 }
-CONVERT_NON_VECTORIZED_INIT(BFloat16, bfloat16)
-CONVERT_NON_VECTORIZED_INIT(Half, half)
+CONVERT_NON_VECTORIZED_INIT(BFloat16, bfloat16);
+CONVERT_NON_VECTORIZED_INIT(Half, half);

 #endif // defined(CPU_CAPABILITY_AVX512)

@ -1663,8 +1663,8 @@ inline void load_fp32_from_##name(const type *data, Vectorized<float>& out1, Vec
  data += Vectorized<float>::size(); \
  load_fp32_from_##name(data, out2); \
 }
-LOAD_FP32_NON_VECTORIZED_INIT(BFloat16, bf16)
-LOAD_FP32_NON_VECTORIZED_INIT(Half, fp16)
+LOAD_FP32_NON_VECTORIZED_INIT(BFloat16, bf16);
+LOAD_FP32_NON_VECTORIZED_INIT(Half, fp16);

 #endif
 }}}
--- a/c10/core/SymInt.cpp
+++ b/c10/core/SymInt.cpp
@ -61,6 +61,7 @@ bool SymInt::has_hint() const {
    }                                                                \
  }

+// clang-format off
 DEFINE_BINARY(operator+, std::plus<>(), add, SymInt)
 DEFINE_BINARY(operator-, std::minus<>(), sub, SymInt)
 DEFINE_BINARY(operator*, std::multiplies<>(), mul, SymInt)
@ -74,6 +75,7 @@ DEFINE_BINARY(sym_gt, std::greater<>(), gt, SymBool)
 DEFINE_BINARY(sym_ge, std::greater_equal<>(), ge, SymBool)
 DEFINE_BINARY(min, std::min, sym_min, SymInt)
 DEFINE_BINARY(max, std::max, sym_max, SymInt)
+// clang-format on

 SymInt::operator SymFloat() const {
  if (auto ma = maybe_as_int()) {
--- a/tools/autograd/templates/VariableType.h
+++ b/tools/autograd/templates/VariableType.h
@ -18,7 +18,7 @@

 namespace at {
  struct Quantizer;
-}
+};

 namespace torch { namespace autograd {

@ -54,6 +54,6 @@ namespace VariableType {
  const at::Tensor & unpack(const Tensor & t, const char * name, int pos);
  at::Tensor unpack_opt(const Tensor & t, const char * name, int pos);
  std::vector<at::Tensor> unpack(const at::ITensorListRef& tl, const char *name, int pos);
-}
+};

 }} // namespace torch::autograd
--- a/torch/csrc/distributed/autograd/context/context.cpp
+++ b/torch/csrc/distributed/autograd/context/context.cpp
@ -21,7 +21,7 @@ std::unordered_set<rpc::worker_id_t> DistAutogradContext::getKnownWorkerIds()
    const {
  std::lock_guard<std::mutex> guard(lock_);
  return knownWorkerIds_;
-}
+};

 void DistAutogradContext::addKnownWorkerId(const rpc::worker_id_t workerId) {
  std::lock_guard<std::mutex> guard(lock_);
--- a/torch/csrc/distributed/c10d/reducer.hpp
+++ b/torch/csrc/distributed/c10d/reducer.hpp
@ -103,7 +103,7 @@ class TORCH_API Reducer {
  // been applied.
  void set_optimizer_in_backward() {
    optim_in_backward_ = true;
-  }
+  };

  // Runs allreduce or installed communication hook given GradBucket instance.
  c10::intrusive_ptr<c10::ivalue::Future> run_comm_hook(
--- a/torch/csrc/jit/api/function_impl.cpp
+++ b/torch/csrc/jit/api/function_impl.cpp
@ -16,7 +16,7 @@
 C10_DEFINE_bool(
    torch_jit_do_not_store_optimized_graph,
    false,
-    "Do not store the optimized graph.")
+    "Do not store the optimized graph.");

 namespace torch::jit {
 namespace {
@ -133,8 +133,8 @@ GraphFunction::SpecializationKey GraphFunction::currentSpecialization() const {
 void preoptimizeGraph(std::shared_ptr<Graph>& graph, bool disable_autocast) {
  Inline(*graph);

-  // Peephole Optimize cleans up many "is None" checks and creates constant
-  // prop opportunities
+  // Peephole Optimize cleans up many "is None" checks and creates constant prop
+  // opportunities
  PeepholeOptimize(graph, true);

  // AliasDb construction can be slow, so run it just on immutable types
--- a/torch/csrc/jit/passes/canonicalize_graph_fuser_ops.cpp
+++ b/torch/csrc/jit/passes/canonicalize_graph_fuser_ops.cpp
@ -6,7 +6,7 @@
 namespace torch::jit {

 struct ChunkOutput {
-  ChunkOutput(Value* v, size_t o) : val(v), offset(o) {}
+  ChunkOutput(Value* v, size_t o) : val(v), offset(o){};
  Value* val;
  size_t offset;
 };
--- a/torch/csrc/jit/passes/tensorexpr_fuser.cpp
+++ b/torch/csrc/jit/passes/tensorexpr_fuser.cpp
@ -29,12 +29,12 @@
 C10_DEFINE_bool(
    torch_jit_disable_cat,
    false,
-    "disable aten::cat in TE fusion groups")
+    "disable aten::cat in TE fusion groups");

 C10_DEFINE_bool(
    torch_jit_enable_dynamic_shape_fusion,
    false,
-    "enable TE fusion using dynamic shapes")
+    "enable TE fusion using dynamic shapes");

 namespace torch::jit {

@ -82,8 +82,9 @@ static const OperatorSet& supported_non_eltwise_set() {
      "aten::_convolution(Tensor input, Tensor weight, Tensor? bias, int[] stride, int[] padding, int[] dilation, bool transposed, int[] output_padding, int groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor",
      "aten::matmul(Tensor self, Tensor other) -> Tensor",
  };
+  // clang-format on
  return supported_non_eltwise_set;
-}
+};

 bool isSupported(Node* node) {
  // For Block codegen we allow limited ops.
@ -101,6 +102,7 @@ bool isSupported(Node* node) {
      "aten::cat(Tensor[] tensors, int dim=0) -> Tensor",
      "aten::unsqueeze(Tensor(a) self, int dim) -> Tensor(a)",
  };
+  // clang-format on

  if (get_tensorexpr_elementwise_set().contains(node) ||
      node->isMemberOf(supported_non_eltwise_set()) ||
@ -901,6 +903,7 @@ class TensorExprFuser {
    static const OperatorSet pow{
      "aten::pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor",
    };
+    // clang-format on

    // Check types of input values.
    for (const Value* v : node->inputs()) {
--- a/torch/csrc/jit/runtime/autodiff.cpp
+++ b/torch/csrc/jit/runtime/autodiff.cpp
@ -167,7 +167,7 @@ static std::optional<std::vector<Value*>> build_script_grad(
  auto grad_inputs = insertGraph(*graph, *bw_graph, grad);
  grad_inputs = unpackOutputs(grad_inputs);
  return grad_inputs;
-}
+};

 namespace {
 class GradientHelper {
--- a/torch/csrc/jit/runtime/graph_executor.cpp
+++ b/torch/csrc/jit/runtime/graph_executor.cpp
@ -56,9 +56,10 @@
 C10_DEFINE_bool(
    torch_jit_execution_plan_reuse_code_graph,
    false,
-    "Directly reuse the preprocessed graph in the CodeImpl to reduce the memory consumption. This is aggressive memory saving, and please be cautious!")
+    "Directly reuse the preprocessed graph in the CodeImpl to reduce the memory consumption. This is aggressive memory saving, and please be cautious!");

 namespace torch::jit {
+
 EnableProfilingGuard::EnableProfilingGuard() {
  auto& executor_mode = getExecutorMode();
  old_executor_mode = executor_mode;
@ -431,8 +432,8 @@ struct DifferentiableGraphOp {

    {
      auto inputs = last(stack, num_inputs);
-      // hook up the outputs of df to the gradient functions of the inputs
-      // that require gradients
+      // hook up the outputs of df to the gradient functions of the inputs that
+      // require gradients
      for (auto idx : grad.df_output_vjps) {
        grad_fn->addOutputForIValue(inputs[idx]);
      }
@ -454,8 +455,8 @@ struct DifferentiableGraphOp {
      // TODO - XXX - if any output is the same tensor multiple times, views
      // have to be setup here. We need to refactor autograd until it is safe
      // for tensors to be constructed without all the viewing infrastructure.
-      // this is currently intentionally not done here so we can get an idea
-      // of our perf before introducing overhead for correctness
+      // this is currently intentionally not done here so we can get an idea of
+      // our perf before introducing overhead for correctness
      for (auto idx : grad.df_input_vjps) {
        grad_fn->addInputIValue(outputs[idx]);
      }
@ -500,8 +501,7 @@ struct DifferentiableGraphOp {
      detach(stack[i]);
    }
  }
-  // Capture (save) inputs that would be required to subsequently run
-  // backwards
+  // Capture (save) inputs that would be required to subsequently run backwards
  void captureInputs(
      DifferentiableGraphBackward& grad_fn,
      at::ArrayRef<IValue> inputs) const {
@ -736,10 +736,8 @@ struct GraphExecutorImpl : public GraphExecutorImplBase {
    runOptimization(opt_graph);

    // Phase 4. If this graph will be differentiated, we need to slice out the
-    //          symbolically differentiable subgraphs for further
-    //          optimizations.
-    // Phase 5. Apply non-differentiable optimizations to the graphs we've
-    // found
+    //          symbolically differentiable subgraphs for further optimizations.
+    // Phase 5. Apply non-differentiable optimizations to the graphs we've found
    //          (or the whole graph if we know we won't need its derivative).
    if (needsGradient(opt_graph)) {
      auto diff_nodes = CreateAutodiffSubgraphs(
@ -783,8 +781,8 @@ struct GraphExecutorImpl : public GraphExecutorImplBase {

  // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
  ArgumentSpecCreator arg_spec_creator_;
-  // Populated only when optimize is false (and in that case plan_cache will
-  // be unused). The compiled version of graph.
+  // Populated only when optimize is false (and in that case plan_cache will be
+  // unused). The compiled version of graph.
  // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
  ExecutionPlan fallback;

--- a/torch/csrc/jit/runtime/interpreter.cpp
+++ b/torch/csrc/jit/runtime/interpreter.cpp
@ -49,12 +49,12 @@ using torch::distributed::autograd::DistAutogradContainer;
 C10_DEFINE_bool(
    torch_jit_enable_rethrow_caught_exception,
    false,
-    "enable rethrowing caught exception")
+    "enable rethrowing caught exception");

 C10_DEFINE_bool(
    torch_jit_enable_expanded_stacks,
    false,
-    "When true we will attemps to pre-expand node stacks and cache expanded stacks.")
+    "When true we will attemps to pre-expand node stacks and cache expanded stacks.");

 namespace torch::jit {

--- a/torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp
+++ b/torch/csrc/jit/runtime/profiling_graph_executor_impl.cpp
@ -41,32 +41,32 @@
 C10_DEFINE_bool(
    torch_jit_enable_new_executor,
    true,
-    "If this flag is set to false TorchScript will be using the legacy/original executor")
+    "If this flag is set to false TorchScript will be using the legacy/original executor");

 C10_DEFINE_bool(
    torch_jit_disable_warning_prints,
    false,
-    "Disables warning.warn prints in TorchScript graph")
+    "Disables warning.warn prints in TorchScript graph");

 C10_DEFINE_bool(
    torch_jit_static_then_dynamic,
    false,
-    "fuse on two static compilations then 10 dynamic")
+    "fuse on two static compilations then 10 dynamic");

 C10_DEFINE_bool(
    torch_jit_always_dynamic,
    false,
-    "fuse on 12 dynamic compilations")
+    "fuse on 12 dynamic compilations");

 C10_DEFINE_bool(
    torch_jit_release_profiling_graph_after_optimization,
    false,
-    "After getOptimizedPlanFor release the optimization record for reduction of memory in inference. This is aggressive memory saving, and please be cautious!")
+    "After getOptimizedPlanFor release the optimization record for reduction of memory in inference. This is aggressive memory saving, and please be cautious!");

 C10_DEFINE_int32(
    torch_jit_release_profiling_graph_delay_in_seconds,
    60,
-    "How long to wait before releasing the profiling graph after optimizaiton is done. Only used if torch_jit_release_profiling_graph_after_optimization is set to true.")
+    "How long to wait before releasing the profiling graph after optimizaiton is done. Only used if torch_jit_release_profiling_graph_after_optimization is set to true.");

 constexpr size_t kDefaultNumProfiledRuns = 1;
 constexpr size_t kDefaultBailoutDepth = 20;
@ -74,11 +74,11 @@ constexpr size_t kDefaultBailoutDepth = 20;
 C10_DEFINE_int64(
    torch_jit_num_profiled_runs,
    kDefaultNumProfiledRuns,
-    "Number of profiling runs")
+    "Number of profiling runs");
 C10_DEFINE_int64(
    torch_jit_bailout_depth,
    kDefaultBailoutDepth,
-    "Number of re-specializations")
+    "Number of re-specializations");

 namespace torch::jit {

--- a/torch/csrc/jit/runtime/static/generated_ops.cpp
+++ b/torch/csrc/jit/runtime/static/generated_ops.cpp
--- a/torch/csrc/jit/runtime/static/impl.cpp
+++ b/torch/csrc/jit/runtime/static/impl.cpp
@ -50,7 +50,7 @@
 C10_DEFINE_bool(
    static_runtime_disable_debug_memory_overlap_check,
    false,
-    "If true, disable the memory overlap check in debug mode in ProcessedNode::run()")
+    "If true, disable the memory overlap check in debug mode in ProcessedNode::run()");

 namespace torch::jit {

--- a/torch/csrc/jit/runtime/static/native_ops.cpp
+++ b/torch/csrc/jit/runtime/static/native_ops.cpp
@ -72,7 +72,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        // put output back
        p_node->Output(0) = std::move(stack[0]);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::TupleUnpack,
@ -91,7 +91,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
          p_node->Output(i) = elems[i];
        }
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::DictConstruct,
@ -116,7 +116,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        }
        p_node->Output(0) = result;
      };
-    })
+    });

 // See [Borrowed IValue Outputs]
 REGISTER_NATIVE_OPERATOR_FUNCTOR(
@ -139,7 +139,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
          p_node->Output(i - 1) = createBorrowedIValue(value->value());
        }
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::__getitem__, aten_getitem, [](Node* n) -> SROperator {
  if (!sr_schema_check(
@ -177,7 +177,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::__getitem__, aten_getitem, [](Node* n) ->

  // TODO(T98581096): make __getitem__ work for other container types
  return nullptr;
-})
+});

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::ListConstruct,
@ -197,7 +197,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        // put output back
        p_node->Output(0) = std::move(stack[0]);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::ListUnpack,
@ -219,7 +219,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
          p_node->Output(i) = list[i];
        }
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::append,
@ -233,7 +233,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        auto list = p_node->Input(0).toList();
        list.push_back(p_node->Input(1));
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::list,
@ -260,7 +260,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(

      LogAndDumpSchema(n);
      return nullptr;
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::numel,
@ -273,7 +273,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& arg = p_node->Input(0).toTensor();
        p_node->Output(0) = arg.numel();
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::cpu,
@ -286,7 +286,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& arg = p_node->Input(0).toTensor();
        p_node->Output(0) = arg.cpu();
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::__range_length,
@ -312,7 +312,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
          p_node->Output(0) = 0;
        }
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::index_put, aten_index_put, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -332,7 +332,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::index_put, aten_index_put, [](Node* n) ->

  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::item,
@ -345,7 +345,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& self = p_node->Input(0).toTensor();
        p_node->Output(0) = at::native::item(self);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::GetAttr,
@ -362,7 +362,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto slot = type.getAttributeSlot(field);
        p_node->Output(0) = module.getSlot(slot);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::SetAttr,
@ -379,7 +379,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto slot = type.getAttributeSlot(field);
        module.setSlot(slot, p_node->Input(1));
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::transpose,
@ -396,7 +396,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto in2_i = p_node->Input(2).toInt();
        p_node->Output(0) = at::native::transpose(in0_t, in1_i, in2_i);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::flatten, aten_flatten, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -410,7 +410,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::flatten, aten_flatten, [](Node* n) -> SRO
    const auto in2_i = p_node->Input(2).toInt();
    p_node->Output(0) = at::native::flatten(in0_t, in1_i, in2_i);
  };
-})
+});

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::permute,
@ -426,7 +426,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto in1_iv = p_node->Input(1).toDimVector();
        p_node->Output(0) = at::native::permute(in0_t, in1_iv);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::reshape,
@ -442,7 +442,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto in1_iv = p_node->Input(1).toDimVector();
        p_node->Output(0) = at::native::reshape(in0_t, in1_iv);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::slice, aten_slice, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -458,7 +458,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::slice, aten_slice, [](Node* n) -> SROpera
    const auto in4_i = p_node->Input(4).toInt();
    p_node->Output(0) = at::native::slice(in0_t, in1_i, in2_i, in3_i, in4_i);
  };
-})
+});

 REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::narrow, aten_narrow, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -497,7 +497,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::narrow, aten_narrow, [](Node* n) -> SROpe
        ").");
    p_node->Output(0) = at::native::slice(self, dim, start, start + length, 1);
  };
-})
+});

 REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::to, aten_to, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -544,7 +544,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::to, aten_to, [](Node* n) -> SROperator {
  }
  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::detach,
@ -559,7 +559,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& in0_t = p_node->Input(0).toTensor();
        p_node->Output(0) = at::native::alias(in0_t);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::expand_as,
@ -575,7 +575,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& other = p_node->Input(1).toTensor();
        p_node->Output(0) = self.expand(other.sizes());
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::isinstance,
@ -600,7 +600,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(

        p_node->Output(0) = false;
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::TypeCheck,
@ -633,7 +633,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(

        p_node->Output(num_inputs) = true;
      };
-    })
+    });

 // See [Borrowed IValue Outputs]
 REGISTER_NATIVE_OPERATOR_FUNCTOR(
@ -653,7 +653,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
          }
        }
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::view,
@ -669,7 +669,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto size = p_node->Input(1).toIntList();
        p_node->Output(0) = at::native::view(input, size.vec());
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::size,
@ -696,7 +696,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
      }
      LogAndDumpSchema(n);
      return nullptr;
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::squeeze,
@ -713,7 +713,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto dim = p_node->Input(1).toInt();
        p_node->Output(0) = at::native::squeeze(self, dim);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::split, aten_split, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -739,7 +739,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::split, aten_split, [](Node* n) -> SROpera

  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::split_with_sizes,
@ -759,7 +759,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        p_node->Output(0) =
            at::native::split_with_sizes(self, split_sizes.vec(), dim);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    static_runtime::select_tensor,
@ -788,7 +788,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
            IValue(c10::MaybeOwnedTraits<at::TensorBase>::createBorrow(
                assignFrom.toTensor()));
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::mul,
@ -814,7 +814,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        }
        pnode->Output(0) = ret;
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::sub,
@ -829,7 +829,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto b = pnode->Input(1).toInt();
        pnode->Output(0) = a - b;
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::add,
@ -855,7 +855,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(

      LogAndDumpSchema(n);
      return nullptr;
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::tensor_split, aten_tensor_split, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -889,7 +889,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(aten::tensor_split, aten_tensor_split, [](Node*
  }
  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::Int,
@ -903,7 +903,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& input = pnode->Input(0).toTensor();
        pnode->Output(0) = at::native::item(input).toInt();
      };
-    })
+    });

 // See [Create owned refs for special values]
 REGISTER_NATIVE_OPERATOR_FUNCTOR(
@ -915,7 +915,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
      }
      return
          [](ProcessedNode* p_node) { p_node->Output(0) = p_node->Input(0); };
-    })
+    });

 namespace {
 bool outputsEmpty(const Block* block) {
@ -1020,7 +1020,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
          return [](ProcessedNode*) {};
      }
      return [](ProcessedNode*) {};
-    })
+    });

 namespace {

@ -1147,7 +1147,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
            smodule, args, future, *launcher);
        (*launcher)(std::move(runtime_launcher));
      };
-    })
+    });
 /*
  aten::wait waits on the future (present in corresponding fork)
  to be executed. Once the execution is complete, the future is marked
@ -1181,7 +1181,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
          p_node->Output(i) = elems[i];
        }
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::Loop,
@ -1225,7 +1225,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
          p_node->Output(i) = std::move(args[i + 1]);
        }
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::CreateObject,
@ -1240,7 +1240,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
            c10::StrongTypePtr(class_type->compilation_unit(), class_type),
            class_type->numAttributes());
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::TupleIndex,
@ -1262,7 +1262,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        }
        pnode->Output(0) = elems[norm_idx];
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::RaiseException,
@ -1275,7 +1275,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& message = pnode->Input(0).toStringRef();
        throw std::runtime_error(message);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::Uninitialized,
@ -1287,7 +1287,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
      return [](ProcessedNode* pnode) {
        pnode->Output(0) = IValue::uninitialized();
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::format,
@ -1304,7 +1304,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        TORCH_DCHECK_EQ(stack.size(), 1);
        pnode->Output(0) = std::move(stack[0]);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::device,
@ -1317,7 +1317,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& input = pnode->Input(0).toTensor();
        pnode->Output(0) = input.device();
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::dtype,
@ -1330,7 +1330,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& input = pnode->Input(0).toTensor();
        pnode->Output(0) = static_cast<int64_t>(input.scalar_type());
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::dim,
@ -1343,7 +1343,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& input = pnode->Input(0).toTensor();
        pnode->Output(0) = input.dim();
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::__not__,
@ -1356,7 +1356,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        auto input = pnode->Input(0).toBool();
        pnode->Output(0) = !input;
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::Bool,
@ -1382,7 +1382,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
      }
      LogAndDumpSchema(n);
      return nullptr;
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::is_cuda,
@ -1395,7 +1395,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& input = pnode->Input(0).toTensor();
        pnode->Output(0) = input.is_cuda();
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    prim::tolist,
@ -1413,7 +1413,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        TORCH_DCHECK_EQ(stack.size(), 1);
        pnode->Output(0) = std::move(stack[0]);
      };
-    })
+    });

 // See [Borrowed IValue Outputs]
 REGISTER_NATIVE_OPERATOR_FUNCTOR(
@ -1428,7 +1428,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        pnode->Output(0) = condition ? createBorrowedIValue(pnode->Input(1))
                                     : createBorrowedIValue(pnode->Input(2));
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::len,
@ -1474,7 +1474,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
      }
      LogAndDumpSchema(n);
      return nullptr;
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::IntImplicit,
@ -1500,7 +1500,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        }
        pnode->Output(0) = at::native::item(tensor).toInt();
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::select,
@ -1517,7 +1517,7 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto index = pnode->Input(2).toInt();
        pnode->Output(0) = at::native::select(self, dim, index);
      };
-    })
+    });

 REGISTER_NATIVE_OPERATOR_FUNCTOR(
    aten::reshape_as,
@ -1533,6 +1533,6 @@ REGISTER_NATIVE_OPERATOR_FUNCTOR(
        const auto& other = pnode->Input(1).toTensor();
        pnode->Output(0) = at::native::reshape(self, other.sizes());
      };
-    })
+    });

 } // namespace torch::jit
--- a/torch/csrc/jit/runtime/static/ops.cpp
+++ b/torch/csrc/jit/runtime/static/ops.cpp
@ -37,6 +37,8 @@
 #include <torch/csrc/jit/tensorexpr/llvm_codegen.h>
 #include <torch/csrc/jit/tensorexpr/loopnest.h>
 #include <iterator>
+#include <mutex>
+#include <unordered_map>

 #include <ATen/CompositeExplicitAutogradFunctions.h>

@ -44,9 +46,10 @@ C10_DEFINE_bool(
    static_runtime_enable_fast_math,
    true,
    "If on, static runtime may use use optimizations that cause accuracy loss "
-    "vs the jit interpreter")
+    "vs the jit interpreter");

 namespace at::native {
+
 static void repeat_out(
    at::Tensor& result,
    const Tensor& self,
@ -137,9 +140,9 @@ static at::Tensor& flatten_copy_out(

  // We don't want to infer_size on the entire shape, because that can give us
  // an extra degree of freedom we don't want; for example, consider shape [0,
-  // 1, 3, 0], with start_dim=1, end_dim=2. It's clear we want result shape
-  // [0, 3, 0] but passing [0, -1, 0] to infer_size means the -1 can take on
-  // any value and satisfy the constraints.
+  // 1, 3, 0], with start_dim=1, end_dim=2. It's clear we want result shape [0,
+  // 3, 0] but passing [0, -1, 0] to infer_size means the -1 can take on any
+  // value and satisfy the constraints.
  auto iter = self.sizes().data();
  auto slice_numel = std::accumulate(
      iter + start_dim,
@ -323,8 +326,8 @@ static Tensor& c2_argmin_out(
                    return true;
                  }
                  // if a is not nan and b is nan, then a is not less than b
-                  // with LessOrNan semantics otherwise, act normally. If `b`
-                  // is NaN then a < b will always return false, so this is
+                  // with LessOrNan semantics otherwise, act normally. If `b` is
+                  // NaN then a < b will always return false, so this is
                  // equivalent to the first snippet.
                  return a < b;
                });
@ -375,7 +378,7 @@ static at::Tensor& dequantize_copy_out(Tensor& out, const Tensor& self) {

 namespace torch::jit {

-C10_DEFINE_REGISTRY(SROperatorRegistry, SROperatorFunctor)
+C10_DEFINE_REGISTRY(SROperatorRegistry, SROperatorFunctor);

 bool opIsRegistered(const c10::Symbol& op_name) {
  const std::string name(op_name.toQualString());
@ -502,7 +505,7 @@ REGISTER_OPERATOR_FUNCTOR(
        }
        listConstructSlowPath(type, size, p_node);
      };
-    })
+    });

 static void tupleConstructSlowPath(const size_t size, ProcessedNode* p_node) {
  // prepare inputs
@ -554,7 +557,7 @@ REGISTER_OPERATOR_FUNCTOR(
        }
        tupleConstructSlowPath(size, p_node);
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(aten::abs, aten_abs, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema("aten::abs(Tensor self) -> Tensor"))) {
@ -571,7 +574,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::abs, aten_abs, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::native::abs_out(in0_t, out_t);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::mul, aten_mul, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -591,7 +594,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::mul, aten_mul, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::cpu::mul_out(out_t, in0_t, in1_t);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::addmm, aten_addmm, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -613,7 +616,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::addmm, aten_addmm, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::cpu::addmm_out(out_t, in0_t, in1_t, in2_t, in3_s, in4_s);
  };
-})
+});

 #ifdef FBCODE_CAFFE2
 // Disable externally to avoid MSVC errors in open-source CI
@ -673,9 +676,9 @@ REGISTER_OPERATOR_FUNCTOR(
             &clamp_min,
             &clamp_max,
             &nan,
-             &output_size})
+             &output_size});
      };
-    })
+    });

 #endif

@ -720,7 +723,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::clamp, aten_clamp, [](Node* n) -> SROperator {
  }
  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::bmm, aten_bmm, [](Node* n) -> SROperator {
  if (!n->matches(
@ -738,7 +741,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::bmm, aten_bmm, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::cpu::bmm_out(out_t, in0_t, in1_t);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::nan_to_num, aten_nan_to_num, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -759,7 +762,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::nan_to_num, aten_nan_to_num, [](Node* n) -> SROp
    fastResizeToZero(out_t);
    at::native::nan_to_num_out(in0_t, in1_d, in2_d, in3_d, out_t);
  };
-})
+});

 namespace {

@ -892,7 +895,7 @@ static SROperator aten_stack(Node* n) {
  };
 }

-REGISTER_OPERATOR_FUNCTOR(aten::stack, aten_stack, aten_stack)
+REGISTER_OPERATOR_FUNCTOR(aten::stack, aten_stack, aten_stack);

 REGISTER_OPERATOR_FUNCTOR(
    prim::VarStack,
@ -910,7 +913,7 @@ REGISTER_OPERATOR_FUNCTOR(
        }
        varStackOut(*p_node, dim);
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(aten::leaky_relu, aten_leaky_relu, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -928,7 +931,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::leaky_relu, aten_leaky_relu, [](Node* n) -> SROp
    auto& out_t = p_node->Output(0).toTensor();
    at::cpu::leaky_relu_out(out_t, in0_t, in1_s);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::relu, aten_relu, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema("aten::relu(Tensor self) -> Tensor"))) {
@ -951,7 +954,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::relu, aten_relu, [](Node* n) -> SROperator {
    int64_t nn = in0_t.numel();
    te->call({out_t.data_ptr(), in0_t.data_ptr(), &nn});
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::tanh, aten_tanh, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema("aten::tanh(Tensor self) -> Tensor"))) {
@ -974,7 +977,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::tanh, aten_tanh, [](Node* n) -> SROperator {
    int64_t nn = in0_t.numel();
    te->call({out_t.data_ptr(), in0_t.data_ptr(), &nn});
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(
    prim::TensorExprDynamicGroup,
@ -1009,7 +1012,7 @@ REGISTER_OPERATOR_FUNCTOR(
          }
        }
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(
    aten::sigmoid,
@ -1035,7 +1038,7 @@ REGISTER_OPERATOR_FUNCTOR(
        int64_t nn = in0_t.numel();
        te->call({out_t.data_ptr(), in0_t.data_ptr(), &nn});
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(aten::logit, aten_logit, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -1070,7 +1073,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::logit, aten_logit, [](Node* n) -> SROperator {
    float c = clamp_value;
    te->call({out_t.data_ptr(), in0_t.data_ptr(), &nn, &c});
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::clone, aten_clone, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -1111,7 +1114,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::clone, aten_clone, [](Node* n) -> SROperator {
        out_t.unsafeGetTensorImpl(), src.sizes(), src.strides());
    at::native::copy_(out_t, src, false);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(
    quantized::embedding_bag_byte_rowwise_offsets,
@ -1149,7 +1152,7 @@ REGISTER_OPERATOR_FUNCTOR(
            compressed_indices_mapping,
            include_last_offset);
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(
    quantized::embedding_bag_4bit_rowwise_offsets,
@ -1187,7 +1190,7 @@ REGISTER_OPERATOR_FUNCTOR(
            compressed_indices_mapping,
            include_last_offset);
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(
    quantized::embedding_bag_byte_prepack,
@ -1208,7 +1211,7 @@ REGISTER_OPERATOR_FUNCTOR(
        fastResizeToZero(out_t);
        at::native::qembeddingbag_byte_prepack_out(out_t, weight);
      };
-    })
+    });

 // The out variant takes precedence over native
 REGISTER_OPERATOR_FUNCTOR(aten::narrow_copy, aten_narrow_copy, [](Node* n) -> SROperator {
@ -1238,7 +1241,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::narrow_copy, aten_narrow_copy, [](Node* n) -> SR
    fastResizeToZero(output);
    at::native::narrow_copy_dense_cpu_out(self, dim, start, length, output);
  };
-})
+});
 REGISTER_OPERATOR_FUNCTOR(aten::index, aten_index, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
          "aten::index.Tensor(Tensor self, Tensor?[] indices) -> Tensor"))) {
@ -1257,7 +1260,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::index, aten_index, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::cpu::index_out(out_t, in0_t, in1_l);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(
    aten::index_select,
@ -1280,7 +1283,7 @@ REGISTER_OPERATOR_FUNCTOR(
        fastResizeToZero(out);
        at::native::index_select_out_cpu_(self, dim, index, out);
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(aten::pow, aten_pow, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -1342,7 +1345,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::pow, aten_pow, [](Node* n) -> SROperator {
  }
  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 namespace {

@ -1620,7 +1623,7 @@ REGISTER_OPERATOR_FUNCTOR(
          return to_maybe_copy_out_functor<false, false>;
        }
      }
-    })
+    });

 // out variant takes precedence over native
 // NB: This impl doesn't work for cpu->cuda copy/cast or vice versa.
@ -1643,7 +1646,7 @@ REGISTER_OPERATOR_FUNCTOR(
      const bool has_memory_format = n->inputs().size() == 5;
      return get_to_copy_functor(
          has_constant_non_tensor_dtype_and_flags, has_memory_format);
-    })
+    });

 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
 REGISTER_OPERATOR_FUNCTOR(
@ -1668,7 +1671,7 @@ REGISTER_OPERATOR_FUNCTOR(
        fastResizeToZero(out_t);
        at::native::dequantize_copy_out(out_t, self);
      };
-    })
+    });

 // Out variants for view ops are registered to a separate registry because
 // their outputs (views) can't participate in memory reuse.
@ -1692,7 +1695,7 @@ REGISTER_OPERATOR_FUNCTOR(
        auto& out = p_node->Output(0).toTensor();
        at::native::reshape_copy_out(out, self, proposed_shape, true);
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(
    static_runtime::flatten_copy,
@ -1715,7 +1718,7 @@ REGISTER_OPERATOR_FUNCTOR(
        auto& out = p_node->Output(0).toTensor();
        at::native::flatten_copy_out(out, self, start_dim, end_dim);
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(aten::sum, aten_sum, [](Node* n) -> SROperator {
  if (n->inputs().size() != 2 && n->inputs().size() != 4) {
@ -1755,7 +1758,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::sum, aten_sum, [](Node* n) -> SROperator {
  }
  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::mean, aten_mean, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -1792,7 +1795,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::mean, aten_mean, [](Node* n) -> SROperator {

  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::repeat, aten_repeat, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -1811,7 +1814,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::repeat, aten_repeat, [](Node* n) -> SROperator {
    at::Tensor& output = p_node->Output(0).toTensor();
    at::native::repeat_out(output, self, repeats);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::max, aten_max, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -1866,7 +1869,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::max, aten_max, [](Node* n) -> SROperator {

  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::sign, aten_sign, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema("aten::sign.Tensor(Tensor input) -> Tensor"))) {
@ -1883,7 +1886,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::sign, aten_sign, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::cpu::sign_out(out_t, in0_t);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::div, aten_div, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -1941,7 +1944,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::div, aten_div, [](Node* n) -> SROperator {
      at::cpu::div_out(out_t, in0_t, in1_t, rounding_mode);
    }
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::log, aten_log, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema("aten::log.Tensor(Tensor input) -> Tensor"))) {
@ -1958,7 +1961,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::log, aten_log, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::cpu::log_out(out_t, in0_t);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::sub, aten_sub, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -1994,7 +1997,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::sub, aten_sub, [](Node* n) -> SROperator {
  }
  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 // TODO: support clamp_min.Tensor(Tensor self, Tensor min) -> Tensor
 REGISTER_OPERATOR_FUNCTOR(
@ -2017,7 +2020,7 @@ REGISTER_OPERATOR_FUNCTOR(
        fastResizeToZero(out_t);
        at::cpu::clamp_min_out(out_t, in0_t, in1_s);
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(aten::argmin, aten_argmin, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -2041,7 +2044,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::argmin, aten_argmin, [](Node* n) -> SROperator {
    }
    at::cpu::argmin_out(out_t, in0_t, dim, keepdim);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::softmax, aten_softmax, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -2063,7 +2066,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::softmax, aten_softmax, [](Node* n) -> SROperator
        dtype == at::ScalarType::Float;
    at::cpu::_softmax_out(out_t, in_t, dim, half_to_float);
  };
-})
+});

 namespace {

@ -2119,7 +2122,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::layer_norm, aten_layer_norm, [](Node* n) -> SROp
    at::Tensor& output = p_node->Output(0).toTensor();
    at::native::layer_norm_cpu_out(output, *X, *gamma, *beta, eps, M, N);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::norm, aten_norm, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -2184,7 +2187,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::norm, aten_norm, [](Node* n) -> SROperator {
  }
  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::matmul, aten_matmul, [](Node* n) -> SROperator {
  if (!n->matches(
@ -2204,7 +2207,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::matmul, aten_matmul, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::native::matmul_out(in0_t, in1_t, out_t);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(quantized::linear, quantized_linear, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -2246,7 +2249,7 @@ REGISTER_OPERATOR_FUNCTOR(quantized::linear, quantized_linear, [](Node* n) -> SR
          input, output_scale, output_zero_point, out_t);
    }
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(
    fb::quantized_linear,
@ -2293,7 +2296,7 @@ REGISTER_OPERATOR_FUNCTOR(
              input, output_scale, output_zero_point, out_t);
        }
      };
-    })
+    });

 namespace {

@ -2373,7 +2376,7 @@ REGISTER_OPERATOR_FUNCTOR(
        return nullptr;
      }
      return quantized_linear_dynamic_fp16_impl<false>(n);
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(
    quantized::linear_relu_dynamic_fp16,
@ -2386,7 +2389,7 @@ REGISTER_OPERATOR_FUNCTOR(
        return nullptr;
      }
      return quantized_linear_dynamic_fp16_impl<true>(n);
-    })
+    });

 // device & pin_memory matter only when CUDA is enabled.
 static bool hasTensorWithOptions(
@ -2435,7 +2438,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::full, aten_full, [](Node* n) -> SROperator {
    p_node->Output(0) =
        at::native::full_out(size, fill_value, p_node->Output(0).toTensor());
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::full_like, aten_full_like, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -2461,7 +2464,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::full_like, aten_full_like, [](Node* n) -> SROper
    at::native::resize_(out_t, in0_t.sizes(), std::nullopt);
    at::native::fill_out(out_t, in1_s);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::ones, aten_ones, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -2484,7 +2487,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::ones, aten_ones, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::native::ones_out(size, out_t);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::ones_like, aten_ones_like, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -2509,7 +2512,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::ones_like, aten_ones_like, [](Node* n) -> SROper
    fastResizeToZero(out_t);
    at::native::ones_out(self.sizes(), out_t);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::zeros, aten_zeros, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -2530,7 +2533,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::zeros, aten_zeros, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::compositeexplicitautograd::zeros_out(out_t, size);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::linear, aten_linear, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -2552,7 +2555,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::linear, aten_linear, [](Node* n) -> SROperator {
    fastResizeToZero(out_t);
    at::native::linear_out(out_t, in0_t, in1_t, in2_t);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::linalg_norm, aten_linalg_norm, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -2602,7 +2605,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::linalg_norm, aten_linalg_norm, [](Node* n) -> SR
  }
  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::cat, aten_cat, [](Node* n) -> SROperator {
  if (!n->matches(
@ -2622,7 +2625,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::cat, aten_cat, [](Node* n) -> SROperator {
    fastResizeToZero(output);
    at::cpu::cat_outf(inputs, dim, output);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(aten::cumsum, aten_cumsum, [](Node* n) -> SROperator {
  if (!n->matches(torch::schema(
@ -2642,7 +2645,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::cumsum, aten_cumsum, [](Node* n) -> SROperator {
    fastResizeToZero(output);
    at::cpu::cumsum_out(output, input, dim, dtype);
  };
-})
+});

 REGISTER_OPERATOR_FUNCTOR(
    aten::nonzero,
@ -2662,7 +2665,7 @@ REGISTER_OPERATOR_FUNCTOR(
        fastResizeToZero(output);
        at::native::nonzero_out_cpu(input, output);
      };
-    })
+    });

 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
 REGISTER_OPERATOR_FUNCTOR(
@ -2687,7 +2690,7 @@ REGISTER_OPERATOR_FUNCTOR(
        fastResizeToZero(out_t);
        at::cpu::cat_outf(inputs, dim, out_t);
      };
-    })
+    });

 namespace {
 // This template and its specialization help us avoid compiler warnings
@ -2749,7 +2752,7 @@ REGISTER_OPERATOR_FUNCTOR(
        int64_t nn = input.numel();
        te->call({out.data_ptr(), input.data_ptr(), &nn});
      };
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(
    aten::remainder,
@ -2787,7 +2790,7 @@ REGISTER_OPERATOR_FUNCTOR(
      // Unrecognized overload
      LogAndDumpSchema(n);
      return nullptr;
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(aten::where, aten_where, [](Node* n) -> SROperator {
  if (n->matches(torch::schema(
@ -2808,7 +2811,7 @@ REGISTER_OPERATOR_FUNCTOR(aten::where, aten_where, [](Node* n) -> SROperator {

  LogAndDumpSchema(n);
  return nullptr;
-})
+});

 REGISTER_OPERATOR_FUNCTOR(
    prim::NumToTensor,
@ -2830,7 +2833,7 @@ REGISTER_OPERATOR_FUNCTOR(
      }
      LogAndDumpSchema(n);
      return nullptr;
-    })
+    });

 REGISTER_OPERATOR_FUNCTOR(
    quantized::embedding_bag_byte_unpack,
@ -2852,6 +2855,6 @@ REGISTER_OPERATOR_FUNCTOR(
        auto& out = pnode->Output(0).toTensor();
        at::native::qembeddingbag_byte_unpack_out(out, weight);
      };
-    })
+    });

 } // namespace torch::jit
--- a/torch/csrc/jit/runtime/static/passes.cpp
+++ b/torch/csrc/jit/runtime/static/passes.cpp
@ -12,9 +12,10 @@
 C10_DEFINE_bool(
    enable_clip_ranges_gather_fusions,
    true,
-    "If on, static runtime or optimize_sparse_nn_model will fuse clip ranges gather ops.")
+    "If on, static runtime or optimize_sparse_nn_model will fuse clip ranges gather ops.");

 namespace torch::jit {
+
 bool graphHasOp(std::shared_ptr<Graph>& graph, const char* op_name) {
  DepthFirstGraphNodeIterator graph_it(graph);
  for (auto node = graph_it.next(); node != nullptr; node = graph_it.next()) {
@ -714,8 +715,8 @@ static void ReplaceWithCopyImpl(
    // b and c are aliases of a, sigmoid_ changes b, c, as well as a. e should
    // equal to d in this case. If we replace reshape with the copy version, b
    // and c are no longer aliases of a, the value of e would change as a
-    // result. To keep static runtime consistent with the jit interpreter,
-    // here we choose not to replace reshape with the copy version
+    // result. To keep static runtime consistent with the jit interpreter, here
+    // we choose not to replace reshape with the copy version
    if (db.hasInputWriters(n)) {
      continue;
    }
@ -1085,8 +1086,8 @@ void ForceNonEmptyOutputsHelper(Value* none_value, Block* block) {
    }

    if (needs_output) {
-      // Loop sub-blocks should always return at least one output (the new
-      // loop condition)
+      // Loop sub-blocks should always return at least one output (the new loop
+      // condition)
      DCHECK(node->kind() == prim::If);
      auto* output = node->addOutput();
      output->setType(c10::NoneType::get());
@ -1339,8 +1340,8 @@ bool isNoOpSlice(Node* node) {
    return false;
  }
  auto end = toIValue(node->input(2));
-  // Could also look at list length, but most models that have this pattern
-  // are just doing list[0:], so it's not needed for now.
+  // Could also look at list length, but most models that have this pattern are
+  // just doing list[0:], so it's not needed for now.
  return end.has_value() && end->isNone();
 }
 } // namespace
--- a/torch/csrc/jit/testing/file_check.cpp
+++ b/torch/csrc/jit/testing/file_check.cpp
@ -538,7 +538,7 @@ struct FileCheckImpl {
  std::vector<std::vector<Check>> groups;
 };

-FileCheck::FileCheck() : fcImpl(new FileCheckImpl()) {}
+FileCheck::FileCheck() : fcImpl(new FileCheckImpl()){};

 std::ostream& operator<<(std::ostream& out, const FileCheckImpl& fc) {
  out << "FileCheck checks:\n";
@ -546,7 +546,7 @@ std::ostream& operator<<(std::ostream& out, const FileCheckImpl& fc) {
    out << "\t" << c << "\n";
  }
  return out;
-}
+};

 FileCheck::~FileCheck() {
  if (!fcImpl->has_run) {
@ -554,17 +554,17 @@ FileCheck::~FileCheck() {
    std::cout << *fcImpl;
  }
  fcImpl.reset();
-}
+};

 void FileCheck::run(const std::string& test_file) {
  fcImpl->run(test_file);
-}
+};

 void FileCheck::run(const Graph& graph) {
  std::stringstream graph_str;
  graph_str << graph;
  fcImpl->run(graph_str.str());
-}
+};

 void FileCheck::run(
    const std::string& input_checks_string,
--- a/torch/csrc/lazy/core/config.cpp
+++ b/torch/csrc/lazy/core/config.cpp
@ -6,74 +6,74 @@ C10_DEFINE_bool(torch_lazy_ir_debug, false, "Enable lazy tensor IR debugging");
 C10_DEFINE_bool(
    torch_lazy_param_aliasing,
    true,
-    "Enable parameter aliasing support")
+    "Enable parameter aliasing support");

 C10_DEFINE_bool(
    torch_lazy_handle_special_scalars,
    false,
-    "Handle special scalars 0 and 1 differently")
+    "Handle special scalars 0 and 1 differently");

 C10_DEFINE_bool(
    torch_lazy_all_numbers_special_scalars,
    false,
-    "Handle all numbers as special scalars")
+    "Handle all numbers as special scalars");

 C10_DEFINE_bool(
    torch_lazy_reuse_ir,
    false,
-    "Reuse IR nodes from previous tracing when possible")
+    "Reuse IR nodes from previous tracing when possible");

 C10_DEFINE_bool(
    torch_lazy_use_thread_pool,
    false,
-    "Use thread pool to schedule backend execution")
+    "Use thread pool to schedule backend execution");

 C10_DEFINE_bool(
    torch_lazy_enable_device_data_cache,
    true,
-    "Enable or disable device data cache (turns cache on or off), does not change cache state")
+    "Enable or disable device data cache (turns cache on or off), does not change cache state");

 C10_DEFINE_int(
    torch_lazy_compilation_cache_size,
    1024,
-    "Size of the compilation cache")
+    "Size of the compilation cache");

 C10_DEFINE_int(
    torch_lazy_device_data_cache_size,
    128,
-    "Size of the DeviceData cache")
+    "Size of the DeviceData cache");

 C10_DEFINE_int(
    torch_lazy_io_thread_pool_size,
-    // TODO: measure which default value
-    // will give better performance,
-    // std::thread::hardware_concurrency()?
+    // TODO: measure which default value will give better
+    // performance, std::thread::hardware_concurrency()?
    1,
-    "Size of the execution thread pool")
+    "Size of the execution thread pool");

-C10_DEFINE_int(torch_lazy_metrics_samples, 1024, "Max metrics sample size")
+C10_DEFINE_int(torch_lazy_metrics_samples, 1024, "Max metrics sample size");

 C10_DEFINE_int(
    torch_lazy_trim_graph_check_frequency,
    5000,
-    "How often to check for whether a graph needs to be split")
+    "How often to check for whether a graph needs to be split");

 C10_DEFINE_int(
    torch_lazy_trim_graph_size,
    100000,
-    "The threshold (in terms of the number of nodes) for splitting a graph")
+    "The threshold (in terms of the number of nodes) for splitting a graph");

 C10_DEFINE_string(
    torch_lazy_metrics_percentiles,
    "0.01:0.05:0.1:0.2:0.5:0.8:0.9:0.95:0.99",
-    "Metrics percentiles to be collected, using : as the delimiter")
+    "Metrics percentiles to be collected, using : as the delimiter");

 C10_DEFINE_int(
    torch_lazy_shape_cache_size,
    4096,
-    "Set the size for the shape cache used for shape inference")
+    "Set the size for the shape cache used for shape inference");

 namespace torch::lazy {
+
 std::string& getLTCForceFallback() {
  static std::string config;
  static bool _ignore = [&]() {
--- a/torch/csrc/lazy/core/dynamic_ir.h
+++ b/torch/csrc/lazy/core/dynamic_ir.h
@ -35,13 +35,13 @@ class TORCH_API DimensionNode {
 public:
  virtual bool isSymbolic() const {
    return false;
-  }
+  };
  virtual int64_t getDynamicValue() const {
    TORCH_CHECK(false, "NYI");
-  }
+  };
  virtual int64_t getStaticValue() const {
    TORCH_CHECK(false, "NYI");
-  }
+  };
  virtual ~DimensionNode() = default;
 };

--- a/torch/csrc/lazy/core/ir.cpp
+++ b/torch/csrc/lazy/core/ir.cpp
@ -10,9 +10,10 @@
 C10_DEFINE_bool(
    ltc_enable_dynamic_shapes,
    false,
-    "Whether dynamic shape is enabled")
+    "Whether dynamic shape is enabled");

 namespace torch::lazy {
+
 static const torch::lazy::Output kNullOutput = torch::lazy::Output();

 size_t Output::Hasher::operator()(const Output& output) const {
--- a/torch/csrc/lazy/ts_backend/dynamic_ir.cpp
+++ b/torch/csrc/lazy/ts_backend/dynamic_ir.cpp
@ -30,7 +30,7 @@ SizeNode::SizeNode(Value input, size_t dim)
          std::vector<Shape>{},
          1,
          MHash(dim)),
-      dim_(dim) {}
+      dim_(dim){};

 int64_t SizeNode::getStaticValue() const {
  return dynamic_cast<const TsNode*>(operand(0).node)
@ -55,7 +55,7 @@ SizeAdd::SizeAdd(Value a, Value b)
          OpKind{c10::Symbol::fromQualString("aten::add")},
          {std::move(a), std::move(b)},
          std::vector<Shape>{},
-          1) {}
+          1){};

 int64_t SizeAdd::getStaticValue() const {
  return DimCast(operand(0))->getStaticValue() +
@ -75,7 +75,7 @@ SizeMul::SizeMul(Value a, Value b)
          OpKind{c10::Symbol::fromQualString("aten::mul")},
          {std::move(a), std::move(b)},
          std::vector<Shape>{},
-          1) {}
+          1){};

 int64_t SizeMul::getStaticValue() const {
  return DimCast(operand(0))->getStaticValue() *
@ -95,7 +95,7 @@ SizeDiv::SizeDiv(Value a, Value b)
          OpKind{c10::Symbol::fromQualString("aten::div")},
          {std::move(a), std::move(b)},
          std::vector<Shape>{},
-          1) {}
+          1){};

 int64_t SizeDiv::getStaticValue() const {
  TORCH_CHECK(
--- a/torch/csrc/lazy/ts_backend/ts_native_functions.cpp
+++ b/torch/csrc/lazy/ts_backend/ts_native_functions.cpp
@ -268,7 +268,7 @@ at::Tensor LazyNativeFunctions::_to_copy(
            std::move(node), lazy_self->GetDevice()));
    return result;
  }
-}
+};

 at::Tensor LazyNativeFunctions::empty_symint(
    at::SymIntArrayRef sym_size,
--- a/torchgen/executorch/api/custom_ops.py
+++ b/torchgen/executorch/api/custom_ops.py
@ -129,7 +129,7 @@ def gen_custom_ops_registration(
        static_init_dispatch_registrations += f"""
 TORCH_LIBRARY_IMPL({namespace}, {dispatch_key}, m) {{
 {dispatch_registrations_body}
-}}"""
+}};"""
    anonymous_definition = "\n".join(
        list(
            concatMap(
--- a/torchgen/gen.py
+++ b/torchgen/gen.py
@ -1615,7 +1615,7 @@ def get_native_function_definitions(
            registration_body += f"""
 TORCH_LIBRARY_IMPL({namespace}, {dispatch_key}, m) {{
    {newline.join(registrations[kernel_namespace][namespace])}
-}}"""
+}};"""
        definitions.extend(
            fm.substitute_with_template(
                "RegisterDispatchDefinitions.ini",
--- a/torchgen/gen_backend_stubs.py
+++ b/torchgen/gen_backend_stubs.py
@ -460,7 +460,7 @@ def gen_dispatcher_registrations(
            """\
 TORCH_LIBRARY_IMPL(aten, $dispatch_key, m) {
    $dispatch_registrations_body
-}"""
+};"""
        )
        static_init_dispatch_registrations = static_template.substitute(
            dispatch_key=dispatch_key,