[1/N] Use internal linkage in torch/csrc C++ files. (#150930)

Turn more functions and variables into static if they are not used outside the cpp files. Unused functions are removed. Pull Request resolved: https://github.com/pytorch/pytorch/pull/150930 Approved by: https://github.com/Skylion007 Co-authored-by: Aaron Gokaslan <aaronGokaslan@gmail.com>
2025-10-20 21:14:14 +08:00 · 2025-04-11 02:19:31 +00:00
parent 48132de4af
commit 24ca7e91e6
82 changed files with 282 additions and 298 deletions
--- a/torch/csrc/Module.cpp
+++ b/torch/csrc/Module.cpp
@ -133,6 +133,10 @@
 #include <callgrind.h>
 #endif

+#ifdef USE_ITT
+#include <torch/csrc/itt.h>
+#endif
+
 namespace py = pybind11;

 static PyObject* module;
@ -1757,12 +1761,6 @@ void initModule(PyObject* module);
 } // namespace torch::xpu
 #endif

-#ifdef USE_ITT
-namespace torch::profiler {
-void initIttBindings(PyObject* module);
-} // namespace torch::profiler
-#endif
-
 static std::vector<PyMethodDef> methods;

 // In Python we can't use the trick of C10_LOG_API_USAGE_ONCE
--- a/torch/csrc/autograd/profiler_kineto.cpp
+++ b/torch/csrc/autograd/profiler_kineto.cpp
@ -858,7 +858,7 @@ std::unique_ptr<ProfilerResult> disableProfiler() {
  return result;
 }
 namespace tracer = torch::profiler::impl::python_tracer;
-std::unique_ptr<tracer::PythonMemoryTracerBase> memory_tracer;
+static std::unique_ptr<tracer::PythonMemoryTracerBase> memory_tracer;
 void startMemoryProfile() {
  if (memory_tracer == nullptr) {
    memory_tracer = tracer::PythonMemoryTracerBase::make();
--- a/torch/csrc/distributed/c10d/intra_node_comm.cpp
+++ b/torch/csrc/distributed/c10d/intra_node_comm.cpp
@ -7,9 +7,6 @@

 namespace c10d::intra_node_comm {

-// NOLINTNEXTLINE(misc-use-internal-linkage)
-bool isIntraNodeCommSupported();
-
 static std::vector<std::string> ENABLE_INTRA_NODE_COMM = {
    "ENABLE_INTRA_NODE_COMM"};
 // Forces detectedTopology() to return Topology::FULLY_CONNECTED, so
--- a/torch/csrc/distributed/c10d/intra_node_comm.hpp
+++ b/torch/csrc/distributed/c10d/intra_node_comm.hpp
@ -87,4 +87,5 @@ class IntraNodeCommWork : public c10d::Work {

 TORCH_API int64_t getIntraNodeCommUsageCounter();

+bool isIntraNodeCommSupported();
 } // namespace c10d::intra_node_comm
--- a/torch/csrc/dynamo/compiled_autograd.cpp
+++ b/torch/csrc/dynamo/compiled_autograd.cpp
@ -3,7 +3,7 @@

 namespace torch::dynamo::autograd {

-std::unique_ptr<PyCompilerInterface> kActivePyCompilerInterface;
+static std::unique_ptr<PyCompilerInterface> kActivePyCompilerInterface;

 const std::unique_ptr<PyCompilerInterface>& getPyCompilerInterface() {
  TORCH_INTERNAL_ASSERT(kActivePyCompilerInterface != nullptr);
--- a/torch/csrc/dynamo/eval_frame_cpp.cpp
+++ b/torch/csrc/dynamo/eval_frame_cpp.cpp
@ -7,8 +7,8 @@
 #include <torch/csrc/dynamo/framelocals_mapping.h>
 #include <torch/csrc/utils/python_compat.h>

-// NOLINTNEXTLINE(misc-use-internal-linkage)
-const char* cache_lookup_profiler_str = "TorchDynamo Cache Lookup";
+static constexpr const char* cache_lookup_profiler_str =
+    "TorchDynamo Cache Lookup";

 // Remember to update the type signature for DynamoCallbackFn.__call__ in
 // torch/_dynamo/types.py if this function's signature changes.
--- a/torch/csrc/dynamo/utils.cpp
+++ b/torch/csrc/dynamo/utils.cpp
@ -9,7 +9,7 @@ static std::array<PyMethodDef, 1> _methods = {{
     nullptr} // Sentinel value indicating the end of the array
 }};

-bool is_instancemethod(py::object obj) {
+static bool is_instancemethod(py::object obj) {
  return PyInstanceMethod_Check(obj.ptr());
 }

--- a/torch/csrc/inductor/aoti_package/pybind.cpp
+++ b/torch/csrc/inductor/aoti_package/pybind.cpp
@ -1,4 +1,5 @@
 #include <torch/csrc/inductor/aoti_package/model_package_loader.h>
+#include <torch/csrc/inductor/aoti_package/pybind.h>
 #include <torch/csrc/inductor/aoti_runner/model_container_runner.h>
 #include <torch/csrc/inductor/aoti_runner/model_container_runner_cpu.h>
 #ifdef USE_CUDA
@ -45,7 +46,6 @@ class AOTIModelPackageLoaderPybind : public AOTIModelPackageLoader {
  }
 };

-// NOLINTNEXTLINE(misc-use-internal-linkage)
 void initAOTIPackageBindings(PyObject* module) {
  auto rootModule = py::handle(module).cast<py::module>();
  auto m = rootModule.def_submodule("_aoti");
--- a/torch/csrc/inductor/aoti_runner/model_container_runner_cpu.cpp
+++ b/torch/csrc/inductor/aoti_runner/model_container_runner_cpu.cpp
@ -33,7 +33,9 @@ std::unique_ptr<AOTIModelContainerRunner> create_aoti_runner_cpu(
 }
 } // namespace

-RegisterAOTIModelRunner register_cpu_runner("cpu", &create_aoti_runner_cpu);
+static RegisterAOTIModelRunner register_cpu_runner(
+    "cpu",
+    &create_aoti_runner_cpu);

 } // namespace torch::inductor
 #endif
--- a/torch/csrc/inductor/aoti_runner/pybind.cpp
+++ b/torch/csrc/inductor/aoti_runner/pybind.cpp
@ -5,6 +5,7 @@
 #ifdef USE_XPU
 #include <torch/csrc/inductor/aoti_runner/model_container_runner_xpu.h>
 #endif
+#include <torch/csrc/inductor/aoti_runner/pybind.h>
 #include <torch/csrc/inductor/aoti_torch/tensor_converter.h>
 #include <torch/csrc/inductor/aoti_torch/utils.h>

--- a/torch/csrc/inductor/aoti_torch/shim_cpu.cpp
+++ b/torch/csrc/inductor/aoti_torch/shim_cpu.cpp
@ -17,7 +17,7 @@ using namespace torch::aot_inductor;
 #if AT_MKLDNN_ENABLED()

 template <typename T>
-c10::List<T> convert_to_c10_List(const T* scalars, const int64_t len) {
+static c10::List<T> convert_to_c10_List(const T* scalars, const int64_t len) {
  c10::List<T> scalars_list;
  scalars_list.reserve(len);
  for (int64_t i = 0; i < len; i++) {
--- a/torch/csrc/instruction_counter/Module.cpp
+++ b/torch/csrc/instruction_counter/Module.cpp
@ -17,8 +17,7 @@

 namespace torch::instruction_counter {

-// NOLINTNEXTLINE(misc-use-internal-linkage)
-long start() {
+static long start() {
 #if !defined(__linux__)
  throw std::runtime_error("This systems seems not to be Linux");
 #else
@ -49,8 +48,7 @@ long start() {
 #endif
 }

-// NOLINTNEXTLINE(misc-use-internal-linkage)
-uint64_t end(int fd) {
+static uint64_t end(int fd) {
 #if !defined(__linux__)
  throw std::runtime_error("This systems seems not to be Linux");
 #else
--- a/torch/csrc/itt.cpp
+++ b/torch/csrc/itt.cpp
@ -1,8 +1,7 @@
+#include <torch/csrc/itt.h>
 #include <torch/csrc/itt_wrapper.h>
-#include <torch/csrc/utils/pybind.h>

 namespace torch::profiler {
-// NOLINTNEXTLINE(misc-use-internal-linkage)
 void initIttBindings(PyObject* module) {
  auto m = py::handle(module).cast<py::module>();

--- a/torch/csrc/itt.h
+++ b/torch/csrc/itt.h
@ -0,0 +1,8 @@
+#ifndef ITT_H
+#define ITT_H
+#include <torch/csrc/utils/pybind.h>
+
+namespace torch::profiler {
+void initIttBindings(PyObject* module); // namespace torch::profiler
+}
+#endif // ITT_H
--- a/torch/csrc/jit/api/module.cpp
+++ b/torch/csrc/jit/api/module.cpp
@ -148,7 +148,7 @@ Module::Module(
 // as we bring up the system since it will degrade performance
 // and may introduce bugs. test_jit.py provides context managers
 // that enable it for specific tests.
-thread_local bool inline_everything = false;
+static thread_local bool inline_everything = false;
 bool& getInlineEverythingMode() {
  return inline_everything;
 }
--- a/torch/csrc/jit/backends/backend_init.cpp
+++ b/torch/csrc/jit/backends/backend_init.cpp
@ -10,7 +10,7 @@
 namespace torch::jit {

 // Get all types that are shared in the module hierarchy rooted at \p mod.
-std::unordered_set<TypePtr> getSharedModuleTypes(Module& mod) {
+static std::unordered_set<TypePtr> getSharedModuleTypes(Module& mod) {
  // Maintain a set of all TypePtrs.
  std::unordered_set<TypePtr> types;
  // Maintain another set of TypePtrs that have been encountered more than once.
@ -32,7 +32,7 @@ std::unordered_set<TypePtr> getSharedModuleTypes(Module& mod) {
 // Selectively lower \p mod to a backend. \p to_backend
 // is called to lower modules. \p modules_to_lower contains
 // qualified names of submodules of \p mod that should be lowered.
-void toBackendSelectiveImpl(
+static void toBackendSelectiveImpl(
    Module& mod,
    const py::function& to_backend,
    const std::vector<std::string>& modules_to_lower,
@ -118,7 +118,7 @@ void toBackendSelectiveImpl(
  }
 }

-Module codegen_func(
+static Module codegen_func(
    const std::string& backend_name,
    const Module& orig_module,
    const py::dict& method_compile_spec) {
--- a/torch/csrc/jit/backends/nnapi/nnapi_backend_preprocess.cpp
+++ b/torch/csrc/jit/backends/nnapi/nnapi_backend_preprocess.cpp
@ -26,7 +26,7 @@ namespace py = pybind11;
 // torch.tensor([[1.0, -1.0, 2.0, -2.0]]).unsqueeze(-1).unsqueeze(-1)
 //
 // In the future, preprocess will accept a dedicated object
-c10::IValue preprocess(
+static c10::IValue preprocess(
    const torch::jit::Module& mod,
    const c10::Dict<c10::IValue, c10::IValue>& method_compile_spec,
    const torch::jit::BackendDebugHandleGenerator& generate_debug_handles) {
--- a/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
+++ b/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.cpp
@ -40,21 +40,6 @@ constexpr int so_suffix_len = 3;
 constexpr int cpp_suffix_len = 4;
 #endif

-intptr_t run(const std::string& cmd);
-
-static bool programExists(const std::string& program) {
-  std::stringstream ss;
-  c10::printQuotedString(ss, program);
-  at::jit::TemplateEnv env;
-  env.s("program", ss.str());
-  std::string cmd = format(check_exists_string, env);
-#ifdef _MSC_VER
-  return (run(cmd.c_str()) == 0);
-#else
-  return (system(cmd.c_str()) == 0);
-#endif
-}
-
 #ifdef _MSC_VER
 static std::optional<std::wstring> exec(const std::wstring& cmd) {
  std::array<wchar_t, 128> buffer;
@ -143,7 +128,7 @@ static void activate() {
  }
 }

-intptr_t run(const std::string& cmd) {
+static intptr_t run(const std::string& cmd) {
  // Getting the path of `cmd.exe`
  const wchar_t* comspec = _wgetenv(L"COMSPEC");
  if (!comspec) {
@ -168,6 +153,19 @@ intptr_t run(const std::string& cmd) {
 }
 #endif

+static bool programExists(const std::string& program) {
+  std::stringstream ss;
+  c10::printQuotedString(ss, program);
+  at::jit::TemplateEnv env;
+  env.s("program", ss.str());
+  std::string cmd = format(check_exists_string, env);
+#ifdef _MSC_VER
+  return (run(cmd.c_str()) == 0);
+#else
+  return (system(cmd.c_str()) == 0);
+#endif
+}
+
 // A single compiler config is accessed through getConfig() (below)
 // Controls compilation options and may be updated based on the result
 // of compilation attempts.
@ -353,5 +351,5 @@ static std::shared_ptr<FusedKernel> createFusionKernel(
      has_random);
 }

-RegisterFusionBackend reg(DeviceType::CPU, createFusionKernel);
+static RegisterFusionBackend reg(DeviceType::CPU, createFusionKernel);
 } // namespace torch::jit::fuser::cpu
--- a/torch/csrc/jit/codegen/fuser/fallback.cpp
+++ b/torch/csrc/jit/codegen/fuser/fallback.cpp
@ -19,7 +19,7 @@ c10::AliasAnalysisKind aliasAnalysisIsSpecialCase() {

 // Registers fused operators so that fused graphs can properly generate fallback
 // code.
-RegisterOperators reg_fused_operators({Operator(
+static RegisterOperators reg_fused_operators({Operator(
    prim::FusedConcat,
    [](const Node* node) -> Operation {
      int64_t dim = node->i(attr::dim);
--- a/torch/csrc/jit/codegen/fuser/interface.cpp
+++ b/torch/csrc/jit/codegen/fuser/interface.cpp
@ -15,11 +15,11 @@ namespace detail {
 #ifdef TORCH_ENABLE_LLVM
 bool cpu_fuser_enabled = true;
 #else
-bool cpu_fuser_enabled = false;
+static bool cpu_fuser_enabled = false;
 #endif

 // note: this doesn't necessarily enable NNC because NVFuser might override it
-bool gpu_fuser_enabled = true;
+static bool gpu_fuser_enabled = true;

 } // namespace detail

--- a/torch/csrc/jit/codegen/onednn/interface.cpp
+++ b/torch/csrc/jit/codegen/onednn/interface.cpp
@ -104,7 +104,7 @@ static Operation createLlgaKernel(const Node* node) {
  };
 }

-RegisterOperators oneDNNFusionGroupOp({
+static RegisterOperators oneDNNFusionGroupOp({
    torch::jit::Operator(
        prim::oneDNNFusionGroup,
        createLlgaKernel,
@ -169,7 +169,7 @@ static Operation createLlgaGuardKernel(const Node* node) {
  };
 }

-RegisterOperators oneDNNGuardOp({
+static RegisterOperators oneDNNGuardOp({
    torch::jit::Operator(
        prim::oneDNNFusionGuard,
        createLlgaGuardKernel,
--- a/torch/csrc/jit/frontend/builtin_functions.cpp
+++ b/torch/csrc/jit/frontend/builtin_functions.cpp
@ -7,7 +7,7 @@

 namespace torch::jit {

-auto scalar_operators_source = at::jit::CodeTemplate(
+static auto scalar_operators_source = at::jit::CodeTemplate(
    R"SCRIPT(
 def mul(a : ${Scalar}, b : Tensor) -> Tensor:
  return b * a
@ -23,7 +23,7 @@ def div(a : ${Scalar}, b : Tensor) -> Tensor:
  return torch.reciprocal(b) * a
 )SCRIPT");

-auto scalar_operators_no_complex_source = at::jit::CodeTemplate(
+static auto scalar_operators_no_complex_source = at::jit::CodeTemplate(
    R"SCRIPT(
 def lt(a : ${Scalar}, b : Tensor) -> Tensor:
  return b > a
@ -35,19 +35,19 @@ def ge(a : ${Scalar}, b : Tensor) -> Tensor:
  return b <= a
 )SCRIPT");

-auto _ntuple_ops = at::jit::CodeTemplate(
+static auto _ntuple_ops = at::jit::CodeTemplate(
    R"SCRIPT(
 def _${name}(x: BroadcastingList${Length}[${Scalar}]) -> List[${Scalar}]:
  return x
 )SCRIPT");

-auto floordiv = at::jit::CodeTemplate(
+static auto floordiv = at::jit::CodeTemplate(
    R"SCRIPT(
 def floordiv(self : Tensor, other : ${Rhs_Type}) -> Tensor:
  return torch.floor_divide(self, other)
 )SCRIPT");

-auto tensor_properties =
+static auto tensor_properties =
    R"SCRIPT(
 def ndim(a : Tensor) -> int:
  return a.dim()
@ -67,7 +67,7 @@ def shape(a : Tensor) -> List[int]:
 // aten::_assert_int_or_pair op which was removed once we were able to compile
 // torch.nn.functional.assert_int_or_pair
 // list_with_default also needs to be here for BC
-auto aten_ops =
+static auto aten_ops =
    R"SCRIPT(
 def _assert_int_or_pair(vals: List[int], name: str, message: str):
  pass
--- a/torch/csrc/jit/frontend/error_report.cpp
+++ b/torch/csrc/jit/frontend/error_report.cpp
@ -6,7 +6,7 @@ namespace torch::jit {

 // Avoid storing objects with destructor in thread_local for mobile build.
 #ifndef C10_MOBILE
-thread_local std::vector<Call> calls;
+static thread_local std::vector<Call> calls;
 #endif // C10_MOBILE

 ErrorReport::ErrorReport(const ErrorReport& e)
--- a/torch/csrc/jit/frontend/ir_emitter.cpp
+++ b/torch/csrc/jit/frontend/ir_emitter.cpp
@ -624,11 +624,6 @@ static Value* materializeConstant(
  return new_constant;
 }

-inline bool isSupportedListElementType(const TypePtr& type) {
-  return type->isSubtypeOf(*TensorType::get()) ||
-      type->isSubtypeOf(*NumberType::get());
-}
-
 // Information for each def being emitted.
 // Defs can be nested to support closures so we need a stack of this information
 // Currently records information about the functions return type.
--- a/torch/csrc/jit/frontend/tracer.cpp
+++ b/torch/csrc/jit/frontend/tracer.cpp
@ -34,14 +34,14 @@ namespace torch::jit::tracer {
 namespace detail {

 template <typename T>
-void genericAddInput(Node* n, T value) {
+static void genericAddInput(Node* n, T value) {
  Value* v = n->owningGraph()->insertConstant(value);
  recordSourceLocation(v->node());
  n->addInput(v);
 }

 template <typename T>
-void genericAddOptionalInput(
+static void genericAddOptionalInput(
    Node* n,
    const char* name,
    const std::optional<T>& value) {
@ -55,7 +55,7 @@ void genericAddOptionalInput(
 }

 template <typename T>
-void badArgType(const T& v) {
+static void badArgType(const T& v) {
  TORCH_CHECK(
      false,
      "Found an unsupported argument type in the JIT tracer: ",
@ -63,7 +63,7 @@ void badArgType(const T& v) {
      ". File a bug report.");
 }

-thread_local std::shared_ptr<TracingState> tracing_state;
+static thread_local std::shared_ptr<TracingState> tracing_state;
 } // namespace detail

 static std::atomic<bool> tracer_state_warn_mode{true};
@ -1055,8 +1055,8 @@ void ArgumentStash::stashValue(
 ////////////////////////////////////////////////////////////////////////////////
 // no python present so we just do not record source information
 static void defaultRecordSourceLocation(Node* n) {}
-std::atomic<decltype(&defaultRecordSourceLocation)> record_source_location(
-    defaultRecordSourceLocation);
+static std::atomic<decltype(&defaultRecordSourceLocation)>
+    record_source_location(defaultRecordSourceLocation);
 void recordSourceLocation(Node* n) {
  return record_source_location.load()(n);
 }
@ -1067,7 +1067,7 @@ void setRecordSourceLocation(void (*v)(Node*)) {
 static std::vector<StackEntry> defaultPythonCallstack() {
  return std::vector<StackEntry>();
 }
-std::atomic<decltype(&defaultPythonCallstack)> python_callstack_fn(
+static std::atomic<decltype(&defaultPythonCallstack)> python_callstack_fn(
    defaultPythonCallstack);
 std::vector<StackEntry> pythonCallstack() {
  return python_callstack_fn.load()();
@ -1079,7 +1079,7 @@ void setPythonCallstack(std::vector<StackEntry> (*v)()) {
 static void defaultWarn(const std::string& str) {
  TORCH_WARN(str);
 }
-std::atomic<warn_fn_type> warn_callback{defaultWarn};
+static std::atomic<warn_fn_type> warn_callback{defaultWarn};

 const char* WARN_PYTHON_DATAFLOW =
    " might cause the trace to be incorrect. We can't record the data flow of "
--- a/torch/csrc/jit/ir/ir.cpp
+++ b/torch/csrc/jit/ir/ir.cpp
@ -100,7 +100,9 @@ void findAllNodes(
 // NB: This overload will become ambiguous with the one Caffe2 provides in its
 // logging, if they ever intersect.
 template <typename T>
-std::ostream& operator<<(std::ostream& out, const std::vector<T>& nodes) {
+static std::ostream& operator<<(
+    std::ostream& out,
+    const std::vector<T>& nodes) {
  out << at::ArrayRef<T>{nodes};
  return out;
 }
@ -1671,7 +1673,7 @@ size_t Node::blocksFromGraphBlock() {
  return dist;
 }

-inline const SourceRange& fakeRange() {
+static inline const SourceRange& fakeRange() {
  static SourceRange range(std::make_shared<Source>(std::string("")), 0, 1);
  return range;
 }
@ -2038,7 +2040,7 @@ at::ArrayRef<Value*> createTupleUnpack(Value* v) {
  return g.insertNode(g.createTupleUnpack(v))->outputs();
 }

-void inlineCallStackOfNode(
+static void inlineCallStackOfNode(
    Node* n,
    std::unordered_map<InlinedCallStack*, InlinedCallStackPtr>& new_cs_entries,
    Function* callee,
--- a/torch/csrc/jit/mobile/compatibility/backport.cpp
+++ b/torch/csrc/jit/mobile/compatibility/backport.cpp
@ -16,7 +16,7 @@ const static BackportManager backportManager;

 // Forward declare so that _backport_for_mobile() overloads can
 // call this method directly.
-bool _backport_for_mobile_impl(
+static bool _backport_for_mobile_impl(
    std::istream& oss,
    PyTorchStreamWriter& writer,
    const int64_t to_version);
--- a/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp
+++ b/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp
@ -137,7 +137,7 @@ uint64_t _get_model_bytecode_version(

 /********************** Operator Version **********************/

-uint64_t _get_model_operator_version(
+static uint64_t _get_model_operator_version(
    PyTorchStreamReader& reader); // Forward Declare

 uint64_t _get_model_operator_version(std::istream& in) {
@ -168,7 +168,7 @@ uint64_t _get_model_operator_version(PyTorchStreamReader& reader) {
 /********************** Operators and Info **********************/

 // Forward declare
-std::unordered_map<std::string, OperatorInfo> _get_model_ops_and_info(
+static std::unordered_map<std::string, OperatorInfo> _get_model_ops_and_info(
    std::vector<IValue> bytecode_ivalues);

 std::unordered_map<std::string, OperatorInfo> _get_model_ops_and_info(
--- a/torch/csrc/jit/mobile/function.cpp
+++ b/torch/csrc/jit/mobile/function.cpp
@ -10,7 +10,6 @@

 namespace torch::jit {

-char const* toString(OpCode op);
 namespace mobile {
 Function::Function(c10::QualifiedName name) : name_(std::move(name)) {}

--- a/torch/csrc/jit/mobile/interpreter.cpp
+++ b/torch/csrc/jit/mobile/interpreter.cpp
@ -12,11 +12,11 @@
 #include <torch/csrc/jit/mobile/function.h>
 #include <torch/csrc/jit/mobile/observer.h>
 #include <torch/csrc/jit/mobile/promoted_prim_ops.h>
+#include <torch/csrc/jit/runtime/instruction.h>
 #include <torch/csrc/jit/runtime/jit_exception.h>
 #include <torch/csrc/jit/runtime/vararg_functions.h>

 namespace torch::jit {
-char const* toString(OpCode op);
 std::ostream& operator<<(std::ostream& out, Instruction inst);
 namespace mobile {
 InterpreterState::InterpreterState(const Code& code) {
--- a/torch/csrc/jit/passes/batch_mm.cpp
+++ b/torch/csrc/jit/passes/batch_mm.cpp
@ -110,7 +110,7 @@ static bool shape_is_fast_for_reduce(
  return m < 512 || ((l < 256 && r < 256) || (l > 256 && r > 256));
 }

-RegisterOperators mm_tree_reduction_reg({Operator(
+static RegisterOperators mm_tree_reduction_reg({Operator(
    "prim::MMTreeReduce(...) -> Tensor",
    [](Stack& stack) {
      auto num_inputs = pop(stack).toInt();
@ -323,7 +323,7 @@ static bool shape_is_fast_for_side(const at::Tensor& other_side_input) {
  return other_side_input.numel() <= 1024 * 2048;
 }

-RegisterOperators mm_batch_side_reg({Operator(
+static RegisterOperators mm_batch_side_reg({Operator(
    prim::MMBatchSide,
    [](const Node* node) -> Operation {
      size_t num_other_side_inputs = node->inputs().size() - 1;
--- a/torch/csrc/jit/passes/decompose_ops.cpp
+++ b/torch/csrc/jit/passes/decompose_ops.cpp
@ -57,7 +57,7 @@ static bool isDecomposableNorm(Node* normalize_op) {
  return false;
 }

-RegisterOperators reg_ops(
+static RegisterOperators reg_ops(
    {Operator(
         "aten::_ncf_unsqueeze(Tensor(a) self, int ndim) -> Tensor(a)",
         [](Stack& stack) {
--- a/torch/csrc/jit/passes/onnx.cpp
+++ b/torch/csrc/jit/passes/onnx.cpp
@ -18,7 +18,7 @@

 namespace torch::jit {

-void removePrintOps(Block* block) {
+static void removePrintOps(Block* block) {
  for (auto it = block->nodes().begin(), end = block->nodes().end(); it != end;
       ++it) {
    for (auto b : it->blocks()) {
@ -46,7 +46,7 @@ void RemovePrintOps(std::shared_ptr<Graph>& graph) {
  GRAPH_DUMP("After RemovePrintOps: ", graph);
 }

-void checkONNXCompatibility(const c10::FunctionSchema& schema) {
+static void checkONNXCompatibility(const c10::FunctionSchema& schema) {
  // in ONNX, all inputs are tensors, no support for tensor list
  // so at most one input tensor list is supported
  bool has_tensor_list = false;
@ -74,7 +74,7 @@ void checkONNXCompatibility(const c10::FunctionSchema& schema) {
  }
 }

-void preprocessCaffe2Ops(Block* block) {
+static void preprocessCaffe2Ops(Block* block) {
  for (auto it = block->nodes().begin(), end = block->nodes().end(); it != end;
       ++it) {
    for (auto b : it->blocks()) {
@ -246,7 +246,7 @@ py::dict BlockToONNX(
  return py::dict();
 }

-bool ConstantFoldCondition(torch::jit::Value* output) {
+static bool ConstantFoldCondition(torch::jit::Value* output) {
  auto fold_condition = output->node()->kind() != c10::onnx::Constant &&
      ConstantValueMap::HasValue(output->debugName());
  auto reliable_value =
--- a/torch/csrc/jit/passes/onnx/cast_all_constant_to_floating.cpp
+++ b/torch/csrc/jit/passes/onnx/cast_all_constant_to_floating.cpp
@ -14,7 +14,7 @@ using namespace ::c10::onnx;
 // many constant operators would have already been removed in the export before
 // this step. On the other hand if cast is inserted in symbolic, subsequent node
 // conversion will break if it depends on certain inputs being constant.
-void CastAllConstantToFloating(Block* block) {
+static void CastAllConstantToFloating(Block* block) {
  auto graph = block->owningGraph();
  auto it = block->nodes().begin();
  while (it != block->nodes().end()) {
--- a/torch/csrc/jit/passes/onnx/constant_fold.cpp
+++ b/torch/csrc/jit/passes/onnx/constant_fold.cpp
@ -30,7 +30,7 @@ enum OnnxType : int {
  ONNX_UINT32,
 };

-std::unordered_map<int, at::ScalarType> onnxTypeToScalarTypeMap = {
+static std::unordered_map<int, at::ScalarType> onnxTypeToScalarTypeMap = {
    // Only conversion of ONNX numeric types is included here.
    // Unsigned ONNX types are mapped to the next higher signed
    // ScalarType type.
@ -46,7 +46,7 @@ std::unordered_map<int, at::ScalarType> onnxTypeToScalarTypeMap = {
    {ONNX_UINT32, at::kLong},
 };

-void handleNegativeStartEndIndex(
+static void handleNegativeStartEndIndex(
    int64_t& start,
    int64_t& end,
    int64_t& axis,
@ -63,7 +63,7 @@ void handleNegativeStartEndIndex(
  }
 }

-std::optional<at::Tensor> runTorchSlice_opset9(
+static std::optional<at::Tensor> runTorchSlice_opset9(
    const Node* node,
    std::vector<at::Tensor>& inputTensorValues) {
  assert(inputTensorValues.size() == 1);
@ -103,7 +103,7 @@ std::optional<at::Tensor> runTorchSlice_opset9(
  return std::optional<at::Tensor>(updated_val);
 }

-std::optional<at::Tensor> runTorchSlice_opset10(
+static std::optional<at::Tensor> runTorchSlice_opset10(
    const Node* node,
    std::vector<at::Tensor>& inputTensorValues) {
  const int maxSliceInputCount = 5;
@ -198,7 +198,7 @@ std::optional<at::Tensor> runTorchSlice_opset10(
 }

 // Refer to AT_FORALL_SCALAR_TYPES_WITH_COMPLEX_EXCEPT_COMPLEX_HALF
-at::Tensor runTorchArange_opset11(
+static at::Tensor runTorchArange_opset11(
    const Node* node,
    const std::vector<at::Tensor>& inputTensorValues) {
  TORCH_INTERNAL_ASSERT(inputTensorValues.size() == 3);
@ -542,7 +542,7 @@ std::optional<at::Tensor> runTorchBackendForOnnx(
  }
 }

-bool isConstant(Value* val, const ValueToParamPairMap& valsToParamsMap) {
+static bool isConstant(Value* val, const ValueToParamPairMap& valsToParamsMap) {
  auto parentNode = val->node();
  return (parentNode->kind() == prim::Param &&
          valsToParamsMap.find(val) !=
@ -553,7 +553,7 @@ bool isConstant(Value* val, const ValueToParamPairMap& valsToParamsMap) {
           AttributeKind::t); // Check other types?
 }

-bool hasParamInput(Node* n, const ValueToParamPairMap& valsToParamsMap) {
+static bool hasParamInput(Node* n, const ValueToParamPairMap& valsToParamsMap) {
  for (auto input : n->inputs()) {
    if (valsToParamsMap.find(input) != valsToParamsMap.end()) {
      return true;
@ -562,7 +562,7 @@ bool hasParamInput(Node* n, const ValueToParamPairMap& valsToParamsMap) {
  return false;
 }

-std::vector<at::Tensor> getValues(
+static std::vector<at::Tensor> getValues(
    Node* node,
    const ValueToParamPairMap& valsToParamsMap) {
  size_t numInputs = node->inputs().size();
@ -587,7 +587,7 @@ std::vector<at::Tensor> getValues(
  return inputTensorValues;
 }

-bool areNodeInputsConstant(
+static bool areNodeInputsConstant(
    Node* node,
    const ValueToParamPairMap& valsToParamsMap) {
  return std::all_of(
@ -596,7 +596,7 @@ bool areNodeInputsConstant(
      [&valsToParamsMap](Value* v) { return isConstant(v, valsToParamsMap); });
 }

-std::vector<Node*> getOnnxConstParentsToRemove(Node* node) {
+static std::vector<Node*> getOnnxConstParentsToRemove(Node* node) {
  std::vector<Node*> parentNodes;
  for (auto val : node->inputs()) {
    // If the parent of 'node' is an onnx::Constant node,
@ -619,7 +619,10 @@ std::vector<Node*> getOnnxConstParentsToRemove(Node* node) {
 // This is more of a partial evaluation analysis, where operations on constant
 // nodes can be lifted so we run them earlier, before the usual parameters are
 // known.
-void ConstantFoldONNX(Block* b, ParamMap& paramsDict, int opset_version) {
+static void ConstantFoldONNX(
+    Block* b,
+    ParamMap& paramsDict,
+    int opset_version) {
  if (opset_version < ONNX_OPSET_9) {
    TORCH_WARN(
        "Constant folding supported for only opsets >= 9. "
--- a/torch/csrc/jit/passes/onnx/constant_map.cpp
+++ b/torch/csrc/jit/passes/onnx/constant_map.cpp
@ -234,7 +234,7 @@ DimSymbolMap& ConstantValueMap::GetDimSymbolMap() {
 }

 template <typename Map>
-void UpdateStrKey(
+static void UpdateStrKey(
    Map& map,
    const std::string& old_key,
    const std::string& new_key) {
--- a/torch/csrc/jit/passes/onnx/deduplicate_initializers.cpp
+++ b/torch/csrc/jit/passes/onnx/deduplicate_initializers.cpp
@ -10,7 +10,7 @@ namespace onnx {
 using namespace ::c10::onnx;
 }

-void DeduplicateInitializers(
+static void DeduplicateInitializers(
    std::shared_ptr<Graph>& g,
    ValueToParamPairMap& valsToParamsMap,
    bool (*comp)(at::Tensor&, at::Tensor&)) {
@ -62,12 +62,12 @@ void DeduplicateInitializers(
  }
 }

-bool DeduplicateInitializersByDataPtr(at::Tensor& t1, at::Tensor& t2) {
+static bool DeduplicateInitializersByDataPtr(at::Tensor& t1, at::Tensor& t2) {
  return t1.sizes().equals(t2.sizes()) && t1.strides().equals(t2.strides()) &&
      (t1.has_storage() && t2.has_storage() && t1.data_ptr() == t2.data_ptr());
 }

-bool DeduplicateInitializersByValue(at::Tensor& t1, at::Tensor& t2) {
+static bool DeduplicateInitializersByValue(at::Tensor& t1, at::Tensor& t2) {
  if (t1.dtype() != t2.dtype() || !t1.sizes().equals(t2.sizes()) ||
      !t1.strides().equals(t2.strides())) {
    return false;
--- a/torch/csrc/jit/passes/onnx/eval_peephole.cpp
+++ b/torch/csrc/jit/passes/onnx/eval_peephole.cpp
@ -12,7 +12,7 @@ namespace onnx {
 using namespace ::c10::onnx;
 }

-std::vector<at::Tensor> getValues(
+static std::vector<at::Tensor> getValues(
    Node* node,
    const ValueToParamPairMap& valsToParamsMap) {
  size_t numInputs = node->inputs().size();
@ -140,7 +140,7 @@ static void fuseConvBatchNorm(Block* b, ValueToParamPairMap& valsToParamsMap) {
  }
 }

-void EvalPeepholeONNX(Block* b, ParamMap& paramsDict) {
+static void EvalPeepholeONNX(Block* b, ParamMap& paramsDict) {
  auto valsToParamsMap = buildValueToParamsMap(b, paramsDict);
  fuseConvBatchNorm(b, valsToParamsMap);
  buildParamsMapFromValueToParamsMap(valsToParamsMap, paramsDict);
--- a/torch/csrc/jit/passes/onnx/fixup_onnx_controlflow.cpp
+++ b/torch/csrc/jit/passes/onnx/fixup_onnx_controlflow.cpp
@ -347,7 +347,7 @@ void FixupONNXLoopNodeInputs(Node* node, int opset_version) {
 }
 } // anonymous namespace

-std::vector<Value*> FixupONNXLoopNode(Node* node, int opset_version) {
+static std::vector<Value*> FixupONNXLoopNode(Node* node, int opset_version) {
  auto output_size = node->outputs().size();
  GRAPH_DEBUG("before FixupONNXLoopBlockInputs: ", *node->owningGraph());
  FixupONNXLoopBlockInputs(node);
@ -368,7 +368,7 @@ std::vector<Value*> FixupONNXLoopNode(Node* node, int opset_version) {

 // Check if node is prim::Uninitialized,
 // or output of prim::Uninitialized->onnx::Identity
-bool IsUninitializedNode(Node* n) {
+static bool IsUninitializedNode(Node* n) {
  if (n->kind() == ::c10::onnx::Identity &&
      n->inputs()[0]->node()->kind() == prim::Uninitialized)
    return true;
@ -380,7 +380,7 @@ bool IsUninitializedNode(Node* n) {
 // Infer shape and type of the uninitialized_output from the corresponding
 // output of the other subblock. prim::Uninitialized node is proven to be
 // unused. So replace this node with one of the inferred shape and type.
-void InferShapeTypeForUninitializedOutput(
+static void InferShapeTypeForUninitializedOutput(
    Graph* graph,
    Block* block,
    Value* uninitialized_output,
@ -456,7 +456,7 @@ void InferShapeTypeForUninitializedOutput(
 //       -> (%1, %y.1, %7)
 //   ...

-void ONNXFixupUninitializedOutput(Node* node, int opset_version) {
+static void ONNXFixupUninitializedOutput(Node* node, int opset_version) {
  if (node->kind() != ::c10::onnx::If) {
    return;
  }
@ -510,7 +510,7 @@ void ONNXFixupUninitializedOutput(Node* node, int opset_version) {
  }
 }

-void ONNXMergeIfBlockOutputShapes(Node* node) {
+static void ONNXMergeIfBlockOutputShapes(Node* node) {
  TORCH_INTERNAL_ASSERT(node->kind() == ::c10::onnx::If);
  Block* then_block = node->blocks().at(0);
  Block* else_block = node->blocks().at(1);
@ -663,7 +663,7 @@ void ONNXMergeIfBlockOutputShapes(Node* node) {
  }
 }

-std::vector<Value*> FixupONNXIfNode(Node* node, int opset_version) {
+static std::vector<Value*> FixupONNXIfNode(Node* node, int opset_version) {
  if (node->kind() != ::c10::onnx::If) {
    return node->outputs().vec();
  }
--- a/torch/csrc/jit/passes/onnx/function_extraction.cpp
+++ b/torch/csrc/jit/passes/onnx/function_extraction.cpp
@ -1125,20 +1125,6 @@ NodeAttrNameMap ONNXFunctionExtraction(
  return fe.run();
 }

-Node* ONNXGetPreviousScope(std::shared_ptr<Graph>& graph) {
-  auto* last_node = graph->nodes().back()->prev();
-  auto* scope_node = NodeOfMostRecentScope(last_node);
-  auto* attr_node = scope_attr_graph_->create(prim::TracedModuleForward);
-  attr_node->setScope(scope_node->scope());
-  TORCH_INTERNAL_ASSERT(
-      scope_attr_map_.find(scope_node->scope()) == scope_attr_map_.end(),
-      "Found duplicated scope. Scope ",
-      scope_node->scope()->namesFromRoot(),
-      " already processed.");
-  scope_attr_map_[scope_node->scope()] = attr_node;
-  return attr_node;
-}
-
 void ONNXClearScopeRecords() {
  scope_attr_map_.clear();
  scope_attr_graph_ = std::make_shared<Graph>();
--- a/torch/csrc/jit/passes/onnx/helper.cpp
+++ b/torch/csrc/jit/passes/onnx/helper.cpp
@ -240,7 +240,7 @@ Node* transformToONNXConcatNode(
  return concat_node;
 }

-void ONNXLintGraph(
+static void ONNXLintGraph(
    const Block* b,
    std::vector<NodeKind>& n_miss_source_range,
    std::vector<NodeKind>& n_miss_scope) {
--- a/torch/csrc/jit/passes/onnx/list_model_parameters.cpp
+++ b/torch/csrc/jit/passes/onnx/list_model_parameters.cpp
@ -22,7 +22,7 @@ using namespace ::c10::onnx;
 //   ...
 //   %weight = prim::GetAttr[name="scale"](%B)
 //   ...
-std::deque<std::string> findSubModuleAttr(
+static std::deque<std::string> findSubModuleAttr(
    Value* input,
    std::string& name,
    Module& attrModule,
@ -48,7 +48,10 @@ std::deque<std::string> findSubModuleAttr(
  return moduleNames;
 }

-Value* addParamAsArgument(Function* function, std::string& name, IValue& attr) {
+static Value* addParamAsArgument(
+    Function* function,
+    std::string& name,
+    IValue& attr) {
  auto schema = function->getSchema();
  auto args = schema.arguments();
  args.emplace_back(name, nullptr, std::nullopt, attr);
@ -64,7 +67,7 @@ Value* addParamAsArgument(Function* function, std::string& name, IValue& attr) {
      attr.type());
 }

-std::vector<IValue> getParamAttributes(
+static std::vector<IValue> getParamAttributes(
    Block* block,
    std::shared_ptr<Graph>& graph,
    const Module& module_,
@ -163,7 +166,7 @@ std::vector<IValue> getParamAttributes(
  return parameterIValues;
 }

-void insertMainModuleAsConstant(const std::shared_ptr<Graph>& graph) {
+static void insertMainModuleAsConstant(const std::shared_ptr<Graph>& graph) {
  auto* constNode = graph->create(prim::CreateObject);
  constNode->output()->setType(graph->inputs().at(0)->type());
  auto it = graph->nodes().begin();
--- a/torch/csrc/jit/passes/onnx/pattern_conversion/autograd_function_process.cpp
+++ b/torch/csrc/jit/passes/onnx/pattern_conversion/autograd_function_process.cpp
@ -6,7 +6,7 @@

 namespace torch::jit {

-void convertSubgraphToSubBlock(Block* block) {
+static void convertSubgraphToSubBlock(Block* block) {
  for (auto it = block->nodes().begin(), end = block->nodes().end();
       it != end;) {
    Node* node = *it++;
--- a/torch/csrc/jit/passes/onnx/peephole.cpp
+++ b/torch/csrc/jit/passes/onnx/peephole.cpp
@ -29,12 +29,12 @@ namespace onnx {
 using namespace ::c10::onnx;
 }

-bool isRNN(const Node* node) {
+static bool isRNN(const Node* node) {
  auto k = node->kind();
  return k == onnx::RNN || k == onnx::LSTM || k == onnx::GRU;
 }

-bool isNopTranspose(const std::vector<int64_t>& perm) {
+static bool isNopTranspose(const std::vector<int64_t>& perm) {
  for (int64_t i = 0, perm_size = perm.size(); i < perm_size; i++) {
    if (perm[i] != i) {
      return false;
@ -52,7 +52,7 @@ bool isNopTranspose(const std::vector<int64_t>& perm) {
 // iteration would have folded all the transposes up to that point. Thus,
 // `ret[i] = t1[t2[i]]` says "the output of t2 at position i takes the value of
 // the input tensor index contained in t1 at position `t2[i]``".
-std::vector<int64_t> composeTransposes(
+static std::vector<int64_t> composeTransposes(
    const std::vector<int64_t>& t1,
    const std::vector<int64_t>& t2) {
  TORCH_INTERNAL_ASSERT(t1.size() == t2.size());
@ -65,7 +65,7 @@ std::vector<int64_t> composeTransposes(
  return ret;
 }

-std::vector<size_t> getBroadcastPositions(Node* node) {
+static std::vector<size_t> getBroadcastPositions(Node* node) {
  // Most of the element-wise ops in ONNX supports numpy broadcasting.
  // Only GEMM supports one-directional broadcasting, which broadcasts the bias
  // to the product.
@ -100,7 +100,7 @@ std::vector<size_t> getBroadcastPositions(Node* node) {
 // Determine whether `from` can broadcast to `to`, and if so at which
 // position. `from` must be a suffix of `to`, except that any
 // occurrences of 1 in `from` are treated as wildcards.
-std::optional<size_t> fusibleExpandTo(
+static std::optional<size_t> fusibleExpandTo(
    at::IntArrayRef from,
    at::IntArrayRef to) {
  if (from.size() > to.size()) {
@ -122,7 +122,7 @@ std::optional<size_t> fusibleExpandTo(
 // easier for non-strided backends to more efficiently do broadcasts if this
 // is local information. This optimization is not useful for PyTorch as
 // 'expand' is free.
-void fuseBroadcast(Block* b) {
+static void fuseBroadcast(Block* b) {
  for (auto n : b->nodes()) {
    for (auto* child_block : n->blocks()) {
      fuseBroadcast(child_block);
@ -179,7 +179,7 @@ void fuseBroadcast(Block* b) {
  }
 }

-void fuseConsecutiveTransposes(Block* b) {
+static void fuseConsecutiveTransposes(Block* b) {
  for (auto n : b->nodes()) {
    for (auto* child_block : n->blocks()) {
      fuseConsecutiveTransposes(child_block);
@ -201,7 +201,7 @@ void fuseConsecutiveTransposes(Block* b) {
  }
 }

-void eliminateNopTranspose(Block* b) {
+static void eliminateNopTranspose(Block* b) {
  for (auto it = b->nodes().begin(), end = b->nodes().end(); it != end; ++it) {
    auto n = *it;
    for (auto* child_block : n->blocks()) {
@ -217,7 +217,7 @@ void eliminateNopTranspose(Block* b) {
  }
 }

-void fuseTransposeIntoGemm(Block* b) {
+static void fuseTransposeIntoGemm(Block* b) {
  static const std::vector<int64_t> simpleTransPerm({1, 0});

  for (auto n : b->nodes()) {
@ -257,7 +257,7 @@ void fuseTransposeIntoGemm(Block* b) {
 //   the removeNopPacking pass removes the packing operations
 //   entirely by pairing them with their inverse PadPacked. If the
 //   input graph does not pair the operations, export will fail.
-void pushPackingPastRnn(Block* b) {
+static void pushPackingPastRnn(Block* b) {
  for (auto it = b->nodes().begin(); it != b->nodes().end(); ++it) {
    auto* n = *it;
    for (auto* child_block : n->blocks()) {
@ -396,7 +396,7 @@ void pushPackingPastRnn(Block* b) {
 // Despite the name, this actually removes the PadPacked node and leaves
 // the PackPadded node. The PackPadded should become dead code which will
 // be eliminated later.
-void removeNopPacking(Block* graph) {
+static void removeNopPacking(Block* graph) {
  for (auto it = graph->nodes().begin(); it != graph->nodes().end(); ++it) {
    auto* n = *it;
    for (auto* child_block : n->blocks()) {
@ -424,7 +424,7 @@ void removeNopPacking(Block* graph) {
  }
 }

-void hackFixupPadPackedShapes(Block* graph) {
+static void hackFixupPadPackedShapes(Block* graph) {
  // FIXME: the shape of the input to the fictional PadPacked node has
  // incorrect shape. For now, just copy the shape of PadPacked to the shape
  // of its input.
@ -442,7 +442,7 @@ void hackFixupPadPackedShapes(Block* graph) {
  }
 }

-void fixDefaultRNNState(
+static void fixDefaultRNNState(
    Graph* graph,
    Node* n,
    int input_index,
@ -535,7 +535,7 @@ void fixDefaultRNNState(
  }
 }

-void fixDefaultRnnHiddenState(Block* b, int opset_version) {
+static void fixDefaultRnnHiddenState(Block* b, int opset_version) {
  for (auto it = b->nodes().begin(); it != b->nodes().end(); ++it) {
    auto* n = *it;
    for (auto* child_block : n->blocks()) {
@ -554,7 +554,7 @@ void fixDefaultRnnHiddenState(Block* b, int opset_version) {
  }
 }

-void fixDefaultLstmCellState(Block* b, int opset_version) {
+static void fixDefaultLstmCellState(Block* b, int opset_version) {
  for (auto it = b->nodes().begin(); it != b->nodes().end(); ++it) {
    auto* n = *it;
    for (auto* child_block : n->blocks()) {
@ -791,7 +791,7 @@ static void eraseTupleConstruct(Block* block) {
  }
 }

-void removeMaxPoolUnusedOutput(Block* b) {
+static void removeMaxPoolUnusedOutput(Block* b) {
  for (auto it = b->nodes().begin(), end = b->nodes().end(); it != end; ++it) {
    auto n = *it;
    for (auto* child_block : n->blocks()) {
--- a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp
+++ b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp
@ -22,7 +22,7 @@

 namespace torch::jit {

-inline bool PyNone_Check(PyObject* o) {
+static inline bool PyNone_Check(PyObject* o) {
  return o == Py_None;
 }

@ -2027,7 +2027,7 @@ void UpdateReliable(Node* n) {
 // Traverse the graph inputs and compute reliability (e.g., are shapes static).
 // Since the inputs do not change during export, we save computation time by
 // marking it as computed and subsequently skipping.
-void SetGraphInputTypeReliable(const Graph* g) {
+static void SetGraphInputTypeReliable(const Graph* g) {
  if (!ConstantValueMap::GetAllGraphInputsReliableComputed()) {
    for (auto graph_input : g->inputs()) {
      if (!ConstantValueMap::HasTypeReliable(graph_input->debugName())) {
@ -2255,7 +2255,7 @@ void ONNXSetDynamicInputShape(
  }
 }

-bool HasSequenceTypeOutput(Node* node) {
+static bool HasSequenceTypeOutput(Node* node) {
  if (node->kind() == ::c10::onnx::SplitToSequence ||
      node->kind() == ::c10::onnx::SequenceInsert ||
      node->kind() == ::c10::onnx::SequenceEmpty ||
@ -2266,7 +2266,7 @@ bool HasSequenceTypeOutput(Node* node) {
  return false;
 }

-void ONNXUpdateTypeFromTensor(
+static void ONNXUpdateTypeFromTensor(
    Value* graph_output,
    const at::Tensor& output,
    bool onnx_shape_inference) {
@ -2282,7 +2282,7 @@ void ONNXUpdateTypeFromTensor(
 // into flattened graph outputs. `outputs_index` is passed in to point to the
 // current index in flattened graph outputs. The updated `outputs_index` is
 // returned at the end of the function.
-size_t ONNXAssignOutputShape(
+static size_t ONNXAssignOutputShape(
    std::shared_ptr<Graph>& graph,
    size_t outputs_index,
    PyObject* output_obj,
--- a/torch/csrc/jit/passes/onnx/unpack_quantized_weights.cpp
+++ b/torch/csrc/jit/passes/onnx/unpack_quantized_weights.cpp
@ -98,7 +98,7 @@ double getScaleFromInput(Node* input_node) {
      input_name);
 }

-std::vector<Node*> CreateQuantizedWeights(
+static std::vector<Node*> CreateQuantizedWeights(
    std::shared_ptr<Graph>& graph,
    const at::Tensor& weight,
    int8_t* data,
@ -191,7 +191,7 @@ std::vector<Node*> CreateQuantizedWeights(
  return {data_node, scale_node, zero_point_node, axis_node};
 }

-Node* CreateQuantizedBias(
+static Node* CreateQuantizedBias(
    std::vector<float> data,
    std::shared_ptr<Graph>& graph,
    const std::vector<int64_t>& shapes) {
@ -206,7 +206,7 @@ Node* CreateQuantizedBias(
  return const_node_1;
 }

-Node* createIntTuple(
+static Node* createIntTuple(
    const std::vector<int64_t>& is,
    std::shared_ptr<Graph>& graph) {
  Node* const_node = graph->create(Symbol::onnx("Constant"));
@ -214,13 +214,13 @@ Node* createIntTuple(
  return const_node;
 }

-Node* createInt(int64_t i, std::shared_ptr<Graph>& graph) {
+static Node* createInt(int64_t i, std::shared_ptr<Graph>& graph) {
  Node* const_node = graph->create(Symbol::onnx("Constant"));
  const_node->i_(Symbol::attr("value"), i);
  return const_node;
 }

-void ConvertQuantizedWeight(
+static void ConvertQuantizedWeight(
    std::shared_ptr<Graph>& graph,
    Node* node,
    at::Tensor& weight) {
@ -254,7 +254,7 @@ enum class QuantizedParamsType { CONV1D, CONV, LINEAR };
 // passed to the appropriate unpack function using c10::Dispatcher. We insert
 // the unpacked weights and bias into the graph using
 // caffe2::Int8GivenTensorFill nodes.
-void unpackQuantizedWeightsHelper(
+static void unpackQuantizedWeightsHelper(
    std::shared_ptr<Graph>& graph,
    std::map<std::string, IValue>& paramsDict,
    const std::string& pattern,
@ -547,7 +547,7 @@ static std::

 // Unpack quantized tensor inputs into {value, scale, zero_point},
 // Then create a prim::TupleConstruct node based on these three values.
-void UnpackQuantizedTensorInputs(std::shared_ptr<Graph>& graph) {
+static void UnpackQuantizedTensorInputs(std::shared_ptr<Graph>& graph) {
  for (size_t index = 0; index < graph->inputs().size();) {
    auto g_input = graph->inputs()[index];
    TensorTypePtr shape_type = g_input->type()->cast<TensorType>();
@ -707,7 +707,7 @@ void UnpackQuantizedWeights(
 // Caffe2 expects quantized ops to be in NHWC format while pytorch inputs are in
 // NCHW. This pass inserts permutes to convert from NCHW to NHWC before each
 // conv op and add another permute from NHWC to NCHW after the conv op.
-void insertPermutesHelper(
+static void insertPermutesHelper(
    std::shared_ptr<Graph>& graph,
    std::map<std::string, IValue>& paramsDict,
    const std::string& pattern) {
--- a/torch/csrc/jit/passes/quantization/helper.cpp
+++ b/torch/csrc/jit/passes/quantization/helper.cpp
@ -18,7 +18,7 @@ using AtenFuncArgs = std::vector<FuncArg>;
 using CallFuncArgs = std::vector<FuncArg>;

 // Lists of allowed quantizable operators
-std::vector<std::string> _static_quantizable_call_funcs = {
+static std::vector<std::string> _static_quantizable_call_funcs = {
    "conv2d",
    "linear",
    "batch_norm",
@ -31,7 +31,7 @@ std::vector<std::string> _static_quantizable_call_funcs = {
    "embedding_bag",
 };

-std::vector<std::string> _static_quantizable_aten_funcs = {
+static std::vector<std::string> _static_quantizable_aten_funcs = {
    "conv1d",
    "conv2d",
    "conv3d",
@ -51,18 +51,18 @@ std::vector<std::string> _static_quantizable_aten_funcs = {
    "embedding_bag",
 };

-std::vector<std::string> _dynamic_quantizable_call_funcs = {
+static std::vector<std::string> _dynamic_quantizable_call_funcs = {
    "linear",
 };

-std::vector<std::string> _dynamic_quantizable_aten_funcs = {
+static std::vector<std::string> _dynamic_quantizable_aten_funcs = {
    "linear",
 };

-std::vector<std::string> _static_weight_only_quant_aten_funcs = {
+static std::vector<std::string> _static_weight_only_quant_aten_funcs = {
    "embedding_bag",
 };
-std::vector<std::string> _static_weight_only_quant_call_funcs = {
+static std::vector<std::string> _static_weight_only_quant_call_funcs = {
    "embedding_bag",
 };

@ -73,7 +73,7 @@ std::vector<std::string> _static_weight_only_quant_call_funcs = {
 // output of the `prim::CallFunction`
 // Also these ops doesn't do computation on the value of Tensor, the
 // operation only depends on the shape of the Tensor
-std::vector<std::string> _single_input_general_shape_call_funcs = {
+static std::vector<std::string> _single_input_general_shape_call_funcs = {
    "_max_pool1d",
    "_max_pool2d",
    "_max_pool3d",
@ -86,7 +86,7 @@ std::vector<std::string> _single_input_general_shape_call_funcs = {
 // Also these ops doesn't do computation on the value of Tensor, the
 // operation only depends on the shape of the Tensor
 // e.g. `aten::flatten(%input_tensor, ...)`
-std::vector<std::string> _single_input_general_shape_aten_funcs = {
+static std::vector<std::string> _single_input_general_shape_aten_funcs = {
    "max_pool1d",
    "max_pool2d",
    "max_pool3d",
@ -121,7 +121,7 @@ std::vector<std::string> _single_input_general_shape_aten_funcs = {
 // Also these ops do computation on the value of Tensor
 // TODO: [Need verify] looks like we can quantize simple functionals that just
 // call into aten functions
-std::vector<std::string> _single_input_general_value_call_funcs = {
+static std::vector<std::string> _single_input_general_value_call_funcs = {
    "avg_pool1d",
    "avg_pool2d",
    "avg_pool3d",
@ -140,7 +140,7 @@ std::vector<std::string> _single_input_general_value_call_funcs = {
 // have a single input Tensor
 // Also these ops do computation on the value of Tensor
 // e.g. `aten::avg_pool2d(%input_tensor, ...)`
-std::vector<std::string> _single_input_general_value_aten_funcs = {
+static std::vector<std::string> _single_input_general_value_aten_funcs = {
    "avg_pool1d",
    "avg_pool2d",
    "avg_pool3d",
@ -163,7 +163,7 @@ std::vector<std::string> _single_input_general_value_aten_funcs = {
    "leaky_relu_",
 };

-std::vector<std::string> _clamp_funcs = {
+static std::vector<std::string> _clamp_funcs = {
    "hardtanh",
    "hardtanh_",
    "clamp",
@ -176,7 +176,7 @@ const float _sym_scale = 2.0f / 256.0f;
 const int _sym_zero_point = 128;
 // quantization parameters for ops with range 0 to 1
 // for example: aten/src/ATen/native/quantized/cpu/qsigmoid.cpp
-std::tuple<c10::QScheme, QParamVector> _per_tensor_asym_qparam =
+static std::tuple<c10::QScheme, QParamVector> _per_tensor_asym_qparam =
    std::make_tuple(
        c10::kPerTensorAffine,
        QParamVector(
@ -186,16 +186,17 @@ std::tuple<c10::QScheme, QParamVector> _per_tensor_asym_qparam =

 // quantization parameters for ops with range -1 to 1
 // for example: aten/src/ATen/native/quantized/cpu/qtanh.cpp
-std::tuple<c10::QScheme, QParamVector> _per_tensor_sym_qparam = std::make_tuple(
-    c10::kPerTensorAffine,
-    QParamVector(
-        {std::make_pair(".scale", IValue(_sym_scale)),
-         std::make_pair(".zero_point", IValue(_sym_zero_point)),
-         std::make_pair(".scalar_type", IValue(c10::kQUInt8))}));
+static std::tuple<c10::QScheme, QParamVector> _per_tensor_sym_qparam =
+    std::make_tuple(
+        c10::kPerTensorAffine,
+        QParamVector(
+            {std::make_pair(".scale", IValue(_sym_scale)),
+             std::make_pair(".zero_point", IValue(_sym_zero_point)),
+             std::make_pair(".scalar_type", IValue(c10::kQUInt8))}));

 // Map from aten op symbol to the quantization parameters
 // for the ops with fixed quantization parameters
-std::unordered_map<NodeKind, std::tuple<c10::QScheme, QParamVector>>
+static std::unordered_map<NodeKind, std::tuple<c10::QScheme, QParamVector>>
    _fixed_qparams_map = {
        {Symbol::aten("hardsigmoid"), _per_tensor_asym_qparam},
        {Symbol::aten("hardsigmoid_"), _per_tensor_asym_qparam},
@ -208,22 +209,26 @@ std::unordered_map<NodeKind, std::tuple<c10::QScheme, QParamVector>>
 // Special checks for ops that do not require observers for all input tensors.
 // For each operator in this list observers are inserted for the input based
 // on the index specified.
-AtenFuncArgs _observe_inputs_aten_func = {};
-CallFuncArgs _observe_inputs_call_func = {{"batch_norm", 1}};
+static AtenFuncArgs _observe_inputs_aten_func = {};
+static CallFuncArgs _observe_inputs_call_func = {{"batch_norm", 1}};

 // Aten functions for getting tensor information
-std::vector<std::string> _tensor_info_funcs = {"size", "len", "dim", "numel"};
+static std::vector<std::string> _tensor_info_funcs = {
+    "size",
+    "len",
+    "dim",
+    "numel"};

 // Aten functions whose output will be quantized or not quantized depending
 // on input tensor
-std::vector<std::string> _propagate_quant_single_input_ops = {"cat"};
+static std::vector<std::string> _propagate_quant_single_input_ops = {"cat"};

 // Rules are slightly different for binary ops like `aten::add`, for these ops,
 // if both of the inputs are Tensor, we'll quantize the output only if both of
 // the inputs are quantized
 // if the second input is a Scalar, we'll only look at the first input to decide
 // if we need to quantize the output
-std::vector<std::string> _propagate_quant_binary_ops = {
+static std::vector<std::string> _propagate_quant_binary_ops = {
    "add",
    "add_",
    "mul",
--- a/torch/csrc/jit/passes/symbolic_shape_runtime_fusion.cpp
+++ b/torch/csrc/jit/passes/symbolic_shape_runtime_fusion.cpp
@ -69,7 +69,7 @@ static std::map<int64_t, Value*> InsertSymbolicShapesCompute(
  return sym_shape_to_enclosing_graph_value;
 }

-void insertDynamicShapesGuard(
+static void insertDynamicShapesGuard(
    const ShapeComputeGraphMapping& shape_mapping,
    Node* guarded_node,
    bool add_composed_op,
@ -115,7 +115,7 @@ StrideInput strideInputFromString(const std::string& si) {
 // in the runtime guard, strides are serialized as one flat
 // vector. stride_inputs_offset indexes into that vector
 // where the strides of this tensor begin
-inline StrideInput summarizeStrideDim(
+static inline StrideInput summarizeStrideDim(
    const c10::IntArrayRef sizes,
    const c10::IntArrayRef strides,
    size_t dim,
@ -517,7 +517,7 @@ static Operation StaticRuntimeCopyOuts(const Node* node) {
  };
 }

-RegisterOperators SRCopyOuts({
+static RegisterOperators SRCopyOuts({
    torch::jit::Operator(
        prim::StaticRuntimeCopyOuts,
        StaticRuntimeCopyOuts,
@ -529,7 +529,7 @@ RegisterOperators SRCopyOuts({
 // and also the that the symbolic shape dimensions are observed.
 // For any symbolic dimension we need to set its value on its first
 // use and for all subsequent uses check that the values are equal
-RegisterOperators reg_guard({
+static RegisterOperators reg_guard({
    Operator(
        "prim::TensorExprDynamicGuard(...) -> bool",
        [](const Node* node) -> Operation {
@ -736,7 +736,7 @@ static Operation createTensorExprDynamicGroup(const Node* node) {
  };
 }

-RegisterOperators TensorExprDynamicOp({
+static RegisterOperators TensorExprDynamicOp({
    torch::jit::Operator(
        prim::TensorExprDynamicGroup,
        createTensorExprDynamicGroup,
--- a/torch/csrc/jit/passes/tensorexpr_fuser.cpp
+++ b/torch/csrc/jit/passes/tensorexpr_fuser.cpp
@ -1436,7 +1436,7 @@ static Operation createTensorExprOp(const Node* node) {
  };
 }

-RegisterOperators TensorExprOps({
+static RegisterOperators TensorExprOps({
    torch::jit::Operator(
        prim::TensorExprGroup,
        createTensorExprOp,
--- a/torch/csrc/jit/python/init.cpp
+++ b/torch/csrc/jit/python/init.cpp
@ -77,6 +77,7 @@
 #include <torch/csrc/jit/passes/utils/check_alias_annotation.h>
 #include <torch/csrc/jit/passes/vulkan_rewrite.h>
 #include <torch/csrc/jit/passes/xnnpack_rewrite.h>
+#include <torch/csrc/jit/python/init.h>
 #include <torch/csrc/jit/python/pybind_utils.h>
 #include <torch/csrc/jit/python/python_arg_flatten.h>
 #include <torch/csrc/jit/python/python_custom_class.h>
--- a/torch/csrc/jit/python/pybind_utils.cpp
+++ b/torch/csrc/jit/python/pybind_utils.cpp
@ -59,7 +59,7 @@ void clear_registered_instances(void* ptr) {
 // SymIntList is in fact only ints, and if so, you called this with T=int64_t.
 // This precondition is NOT checked at runtime.
 template <typename T>
-IValue listToIValue(py::handle obj) {
+static IValue listToIValue(py::handle obj) {
  c10::List<T> rs;
  for (auto it = obj.begin(); it != obj.end(); it++) {
    auto elm = *it;
--- a/torch/csrc/jit/python/python_ir.cpp
+++ b/torch/csrc/jit/python/python_ir.cpp
@ -26,13 +26,13 @@
 namespace torch::jit {

 // Controls whether graph source ranges are printed by default
-bool global_print_source_ranges = true;
+static bool global_print_source_ranges = true;

 Symbol ConcretePythonOp::Kind = prim::PythonOp;

 using c10::Type;

-std::string getPythonName(const PyObject* obj_) {
+static std::string getPythonName(const PyObject* obj_) {
  pybind11::gil_scoped_acquire gil;
  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
  PyObject* obj = const_cast<PyObject*>(obj_);
@ -41,7 +41,7 @@ std::string getPythonName(const PyObject* obj_) {
  return py::str(v);
 }

-std::ostream& printPyObject(std::ostream& out, const THPObjectPtr& obj) {
+static std::ostream& printPyObject(std::ostream& out, const THPObjectPtr& obj) {
  pybind11::gil_scoped_acquire gil;
  // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
  auto pyobj = py::handle(const_cast<PyObject*>(obj.get()));
@ -81,7 +81,7 @@ std::ostream& printPyObject(std::ostream& out, const THPObjectPtr& obj) {
  }
 }

-Node* findNode(
+static Node* findNode(
    c10::ArrayRef<torch::jit::Block*> blocks,
    Symbol kind,
    bool recurse = true) {
@ -101,7 +101,7 @@ Node* findNode(
  return nullptr;
 }

-Node* findNode(Block* block, Symbol kind, bool recurse = true) {
+static Node* findNode(Block* block, Symbol kind, bool recurse = true) {
  std::vector<Block*> blocks = {block};
  return findNode(blocks, kind, recurse);
 }
--- a/torch/csrc/jit/python/python_sugared_value.cpp
+++ b/torch/csrc/jit/python/python_sugared_value.cpp
@ -381,7 +381,7 @@ SugaredValuePtr ModuleValue::getitem(
      << "ParameterList, and ParameterDict modules are subscriptable");
 }

-void checkInterface(
+static void checkInterface(
    const SourceRange& loc,
    GraphFunction& m,
    const std::shared_ptr<ModuleValue>& self,
@ -582,7 +582,7 @@ std::shared_ptr<SugaredValue> SugaredDict::attr(
  TORCH_INTERNAL_ASSERT(false);
 }

-std::shared_ptr<SugaredEnumClass> createSugaredEnumClassFromObj(
+static std::shared_ptr<SugaredEnumClass> createSugaredEnumClassFromObj(
    const py::object& obj,
    GraphFunction& m,
    const SourceRange& loc) {
@ -595,7 +595,7 @@ std::shared_ptr<SugaredEnumClass> createSugaredEnumClassFromObj(
 }

 // helper function for instantiating a SugaredValue from an IValue
-std::shared_ptr<SugaredValue> toSugaredValue(
+static std::shared_ptr<SugaredValue> toSugaredValue(
    const IValue& v,
    GraphFunction& m,
    const SourceRange& loc) {
@ -1057,7 +1057,7 @@ TypePtr registerNamedTuple(
  return tt;
 }

-bool isEnumClass(py::object obj) {
+static bool isEnumClass(py::object obj) {
  auto enum_type_obj =
      py::cast<py::object>(py::module::import("enum").attr("Enum"));
  int ret = PyObject_IsSubclass(obj.ptr(), enum_type_obj.ptr());
@ -1068,7 +1068,7 @@ bool isEnumClass(py::object obj) {
  return ret == 1;
 }

-std::shared_ptr<SugaredValue> createSimpleEnumValue(
+static std::shared_ptr<SugaredValue> createSimpleEnumValue(
    const py::object& obj,
    GraphFunction& m,
    const SourceRange& loc) {
--- a/torch/csrc/jit/python/python_tracer.cpp
+++ b/torch/csrc/jit/python/python_tracer.cpp
@ -22,7 +22,7 @@ namespace torch::jit::tracer {

 // Python interpreter retrieval routine adapted from
 // https://stackoverflow.com/a/8706144
-std::vector<StackEntry> _pythonCallstack() {
+static std::vector<StackEntry> _pythonCallstack() {
  pybind11::gil_scoped_acquire gil;
  PyFrameObject* frame = PyEval_GetFrame();
  Py_XINCREF(frame);
@ -196,11 +196,11 @@ Node* preRecordPythonTrace(
  return n;
 }

-void pythonRecordSourceLocation(Node* n) {
+static void pythonRecordSourceLocation(Node* n) {
  n->setSourceRange(getPythonInterpreterSourceRange());
 }

-void pythonWarn(const std::string& reason) {
+static void pythonWarn(const std::string& reason) {
  pybind11::gil_scoped_acquire gil;
  auto warn_class = py::module::import("torch.jit").attr("TracerWarning");
  PyErr_WarnEx(warn_class.ptr(), reason.c_str(), 1);
--- a/torch/csrc/jit/python/python_tree_views.cpp
+++ b/torch/csrc/jit/python/python_tree_views.cpp
@ -12,7 +12,7 @@ namespace py = pybind11;

 namespace torch::jit {

-std::optional<std::string> maybeConvertToString(const py::object& obj) {
+static std::optional<std::string> maybeConvertToString(const py::object& obj) {
  if (obj.is_none()) {
    return std::nullopt;
  }
@ -58,14 +58,16 @@ struct SourceRangeFactory {
 };

 template <typename T>
-List<T> wrap_list(const SourceRange& fallback_pos, std::vector<T>&& vec) {
+static List<T> wrap_list(
+    const SourceRange& fallback_pos,
+    std::vector<T>&& vec) {
  if (vec.empty())
    return List<T>::create(fallback_pos, std::move(vec));
  return List<T>::create(vec.front().range(), std::move(vec));
 }

 template <typename T>
-Maybe<T> wrap_maybe(const SourceRange& fallback_pos, T* val) {
+static Maybe<T> wrap_maybe(const SourceRange& fallback_pos, T* val) {
  return val ? Maybe<T>::create(val->range(), *val)
             : Maybe<T>::create(fallback_pos);
 }
--- a/torch/csrc/jit/python/script_init.cpp
+++ b/torch/csrc/jit/python/script_init.cpp
@ -246,7 +246,7 @@ FunctionDefaults calcOverloadedFunctionDefaults(

 } // namespace

-bool checkMutableFunctionDefault(const py::object& def_arg) {
+static bool checkMutableFunctionDefault(const py::object& def_arg) {
  if (py::isinstance<py::list>(def_arg) || py::isinstance<py::dict>(def_arg)) {
    return true;
  }
@ -262,7 +262,7 @@ bool checkMutableFunctionDefault(const py::object& def_arg) {
  return false;
 }

-void checkMutableFunctionDefault(
+static void checkMutableFunctionDefault(
    const SourceRange& range,
    const Argument& arg,
    const py::object& def_arg) {
@ -276,7 +276,7 @@ void checkMutableFunctionDefault(
  }
 }

-FunctionSchema getSchemaWithNameAndDefaults(
+static FunctionSchema getSchemaWithNameAndDefaults(
    const SourceRange& range,
    const FunctionSchema& schema,
    const std::optional<std::string>& new_name,
@ -472,7 +472,7 @@ static std::shared_ptr<Graph> _propagate_and_assign_input_shapes(
  return retval;
 }

-void addFunctionToModule(Module& module, const StrongFunctionPtr& func) {
+static void addFunctionToModule(Module& module, const StrongFunctionPtr& func) {
  // Make a graph with a fake self argument
  auto graph = toGraphFunction(*func.function_).graph()->copy();
  auto v = graph->insertInput(0, "self");
@ -484,7 +484,7 @@ void addFunctionToModule(Module& module, const StrongFunctionPtr& func) {
 }

 // this is used in our test suite to check that we correctly preserved type tags
-bool ivalue_tags_match(const Module& lhs, const Module& rhs) {
+static bool ivalue_tags_match(const Module& lhs, const Module& rhs) {
  struct Work {
    IValue a;
    IValue b;
@ -605,7 +605,7 @@ struct slot_dict_impl {
 };

 template <typename T>
-py::list debugMakeList(const T& list) {
+static py::list debugMakeList(const T& list) {
  py::list result;
  for (const auto& elem : list) {
    result.append(py::cast(elem));
@ -613,7 +613,7 @@ py::list debugMakeList(const T& list) {
  return result;
 }
 template <typename T>
-py::list debugMakeNamedList(const T& list) {
+static py::list debugMakeNamedList(const T& list) {
  py::list result;
  for (auto elem : list) {
    result.append(py::cast(std::make_pair(elem.name, elem.value)));
@ -621,7 +621,7 @@ py::list debugMakeNamedList(const T& list) {
  return result;
 }
 template <typename T>
-py::set debugMakeSet(const T& list) {
+static py::set debugMakeSet(const T& list) {
  py::set result;
  for (const auto& elem : list) {
    result.add(py::cast(elem));
@ -674,7 +674,7 @@ struct DeepCopyMemoTable {
  std::shared_ptr<IValue::HashIdentityIValueMap> map;
 };

-IValue pyIValueDeepcopy(const IValue& ivalue, const py::dict& memo) {
+static IValue pyIValueDeepcopy(const IValue& ivalue, const py::dict& memo) {
  if (!memo.contains(py::str("__torch_script_memo_table"))) {
    memo["__torch_script_memo_table"] =
        DeepCopyMemoTable{std::make_shared<IValue::HashIdentityIValueMap>()};
@ -684,7 +684,7 @@ IValue pyIValueDeepcopy(const IValue& ivalue, const py::dict& memo) {
  return ivalue.deepcopy(ivalue_memo);
 }

-ExtraFilesMap extra_files_from_python(const py::dict& pydict) {
+static ExtraFilesMap extra_files_from_python(const py::dict& pydict) {
  ExtraFilesMap r;
  for (const auto& it : pydict) {
    r[py::cast<std::string>(it.first)] = "";
@ -692,14 +692,16 @@ ExtraFilesMap extra_files_from_python(const py::dict& pydict) {
  return r;
 }

-void extra_files_to_python(const ExtraFilesMap& m, const py::dict& pydict) {
+static void extra_files_to_python(
+    const ExtraFilesMap& m,
+    const py::dict& pydict) {
  // py::dict is pointer-like type so it gets modified despite const&
  for (const auto& it : m) {
    pydict[py::str(it.first)] = py::bytes(it.second);
  }
 }

-void pyCompilationUnitDefine(
+static void pyCompilationUnitDefine(
    CompilationUnit& cu,
    const std::string& src,
    const ResolutionCallback* rcb,
--- a/torch/csrc/jit/python/update_graph_executor_opt.cpp
+++ b/torch/csrc/jit/python/update_graph_executor_opt.cpp
@ -2,7 +2,7 @@

 namespace torch::jit {

-thread_local bool kOptimize = true;
+static thread_local bool kOptimize = true;
 void setGraphExecutorOptimize(bool o) {
  kOptimize = o;
 }
--- a/torch/csrc/jit/runtime/decomposition_registry.cpp
+++ b/torch/csrc/jit/runtime/decomposition_registry.cpp
@ -165,8 +165,8 @@ struct JitDecomp final : torch::autograd::impl::JitDecompInterface {
      torch::jit::Stack* stack) const override;
 };

-JitDecomp jitDecomp;
-torch::autograd::impl::JitDecompRegisterer registerJitDecomp(&jitDecomp);
+static JitDecomp jitDecomp;
+static torch::autograd::impl::JitDecompRegisterer registerJitDecomp(&jitDecomp);

 void JitDecomp::run_jit_decomposition(
    const c10::OperatorHandle& op,
--- a/torch/csrc/jit/runtime/graph_executor.cpp
+++ b/torch/csrc/jit/runtime/graph_executor.cpp
@ -82,7 +82,7 @@ c10::AliasAnalysisKind aliasAnalysisInternalSpecialCase() {
 // for debugging it is helpful to be able to force autodiff subgraphs
 // to be created, to check their correctness, even when the
 // size of the of the subgraph is too small to be profitable.
-thread_local bool autodiff_subgraph_inlining = true;
+static thread_local bool autodiff_subgraph_inlining = true;
 void debugSetAutodiffSubgraphInlining(bool state) {
  autodiff_subgraph_inlining = state;
 }
@ -102,7 +102,7 @@ bool getFusionGroupInlining() {
  return fusion_group_inlining;
 }

-thread_local std::weak_ptr<Graph> last_executed_optimized_graph;
+static thread_local std::weak_ptr<Graph> last_executed_optimized_graph;
 std::shared_ptr<Graph> lastExecutedOptimizedGraph() {
  return last_executed_optimized_graph.lock();
 }
@ -542,7 +542,7 @@ Gradient getGradient(const Node* n) {
 }
 } // anonymous namespace

-RegisterOperators reg_graph_executor_ops({Operator(
+static RegisterOperators reg_graph_executor_ops({Operator(
    prim::DifferentiableGraph,
    [](const Node* n) -> Operation {
      return DifferentiableGraphOp(getGradient(n));
--- a/torch/csrc/jit/runtime/interpreter.cpp
+++ b/torch/csrc/jit/runtime/interpreter.cpp
@ -106,7 +106,7 @@ inline int64_t getDistAutogradContextId() {
 }
 } // namespace

-thread_local InterpreterStateImpl* tls_int_state_ptr_ = nullptr;
+static thread_local InterpreterStateImpl* tls_int_state_ptr_ = nullptr;
 struct TLSCurrentInterpreterGuard {
  TLSCurrentInterpreterGuard(InterpreterStateImpl* state)
      : prev_state_(tls_int_state_ptr_) {
--- a/torch/csrc/jit/runtime/logging.cpp
+++ b/torch/csrc/jit/runtime/logging.cpp
@ -42,7 +42,7 @@ void LockingLogger::setAggregationType(
  agg_types[stat_name] = type;
 }

-std::atomic<LoggerBase*> global_logger{new NoopLogger()};
+static std::atomic<LoggerBase*> global_logger{new NoopLogger()};

 LoggerBase* getLogger() {
  return global_logger.load();
--- a/torch/csrc/jit/runtime/serialized_shape_function_registry.cpp
+++ b/torch/csrc/jit/runtime/serialized_shape_function_registry.cpp
@ -16,7 +16,7 @@
 namespace torch::jit {


-std::string shape_funcs = ""
+static std::string shape_funcs = ""
 + std::string(R"=====(
 def unary(self: List[int]) -> List[int]:
  out = annotate(List[int], [])
--- a/torch/csrc/jit/runtime/static/fusion.cpp
+++ b/torch/csrc/jit/runtime/static/fusion.cpp
@ -19,7 +19,7 @@

 namespace torch::jit {

-void createFusionGroups(Block* block, AliasDb* aliasDb, size_t min_size);
+static void createFusionGroups(Block* block, AliasDb* aliasDb, size_t min_size);

 void fuseStaticSubgraphs(std::shared_ptr<Graph> graph, size_t min_size) {
  Inline(*graph);
@ -60,7 +60,7 @@ static Operation createStaticSubgraphRuntime(const Node* node) {
  };
 }

-RegisterOperators StaticSubgraphOps({torch::jit::Operator(
+static RegisterOperators StaticSubgraphOps({torch::jit::Operator(
    prim::StaticSubgraph,
    createStaticSubgraphRuntime,
    AliasAnalysisKind::INTERNAL_SPECIAL_CASE)});
--- a/torch/csrc/jit/serialization/export_module.cpp
+++ b/torch/csrc/jit/serialization/export_module.cpp
@ -956,7 +956,7 @@ std::vector<std::string> export_opnames(const script::Module& m) {
 // Thread local flag (only happens in export, i.e. on server side)
 // to control if instructions for bytecode default inputs are emitted
 // or not. It's the major difference between bytecode v5 and v6.
-thread_local bool emitBytecodeDefaultInputs =
+static thread_local bool emitBytecodeDefaultInputs =
    caffe2::serialize::kProducedBytecodeVersion <= 5 ? true : false;
 bool BytecodeEmitMode::is_default_value_for_unspecified_arg_enabled() {
  return emitBytecodeDefaultInputs;
@ -966,7 +966,7 @@ void BytecodeEmitMode::set_default_value_for_unspecified_arg_enabled(
  emitBytecodeDefaultInputs = enabled;
 }

-thread_local bool emitDefautlArgsWithOutArgs =
+static thread_local bool emitDefautlArgsWithOutArgs =
    caffe2::serialize::kProducedBytecodeVersion <= 6 ? false : true;
 bool BytecodeEmitMode::is_default_args_before_out_args_enabled() {
  return emitDefautlArgsWithOutArgs;
@ -975,7 +975,7 @@ void BytecodeEmitMode::set_default_args_before_out_args_enabled(bool enabled) {
  emitDefautlArgsWithOutArgs = enabled;
 }

-thread_local bool emitDefaultEmitPromotedOps =
+static thread_local bool emitDefaultEmitPromotedOps =
    caffe2::serialize::kProducedBytecodeVersion <= 7 ? false : true;
 bool BytecodeEmitMode::is_emit_promoted_ops_enabled() {
  return emitDefaultEmitPromotedOps;
--- a/torch/csrc/jit/serialization/source_range_serialization.cpp
+++ b/torch/csrc/jit/serialization/source_range_serialization.cpp
@ -13,7 +13,7 @@ namespace torch::jit {
 // "Whether to emit compact debug_pkl when saving a model to .pt file."
 // "Compact file is smaller but cannot be loaded by old torch binaries."
 // TODO(qihan) remove when all binaries are using string table.
-thread_local bool should_use_format_with_string_table_ = true;
+static thread_local bool should_use_format_with_string_table_ = true;

 class SourceRangeSerializer {
 public:
--- a/torch/csrc/jit/serialization/unpickler.cpp
+++ b/torch/csrc/jit/serialization/unpickler.cpp
@ -270,18 +270,6 @@ void Unpickler::setInput(size_t memo_id) {
  }
 }

-// emplace_back on bool vectors does not exist on some systems
-// avoid it by calling push_back for bool
-template <typename T>
-inline void append(std::vector<T>& a, T&& e) {
-  a.emplace_back(std::forward<T>(e));
-}
-template <>
-// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved)
-inline void append<bool>(std::vector<bool>& a, bool&& e) {
-  a.push_back(e);
-}
-
 static std::vector<int64_t> tupleToIntList(const IValue& v) {
  return fmap(v.toTupleRef().elements(), [](const IValue& v) -> int64_t {
    return v.toInt();
@ -1189,7 +1177,7 @@ void Unpickler::readList(IValue list_ivalue) {
  readListElements(std::move(list_ivalue), start);
 }

-inline bool is_valid_python_id_char(char c) {
+static inline bool is_valid_python_id_char(char c) {
  return c == '_' || c == '.' || (c >= '0' && c <= '9') ||
      (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
 }
--- a/torch/csrc/jit/tensorexpr/block_codegen.cpp
+++ b/torch/csrc/jit/tensorexpr/block_codegen.cpp
@ -363,6 +363,6 @@ void BlockCodeGen::call_raw(const std::vector<void*>& args) {
 }

 BlockCodeGen::~BlockCodeGen() = default;
-RegisterCodeGen<BlockCodeGen> block_codegen_reg("block_codegen");
+static RegisterCodeGen<BlockCodeGen> block_codegen_reg("block_codegen");

 } // namespace torch::jit::tensorexpr
--- a/torch/csrc/jit/tensorexpr/bounds_inference.cpp
+++ b/torch/csrc/jit/tensorexpr/bounds_inference.cpp
@ -18,7 +18,7 @@ namespace torch::jit::tensorexpr {
 using namespace analysis;

 template <typename Container>
-BoundsInfo mergeTensorAccesses(
+static BoundsInfo mergeTensorAccesses(
    const Container& accesses,
    const std::unordered_map<VarPtr, BufPtr>& varToBuf,
    bool distinctAccessKinds) {
--- a/torch/csrc/jit/tensorexpr/cpp_codegen.cpp
+++ b/torch/csrc/jit/tensorexpr/cpp_codegen.cpp
@ -77,7 +77,7 @@ void CppPrinter::printPrologue() {
 }

 template <typename T>
-inline std::enable_if_t<!std::is_floating_point_v<T>, void> visit_mod(
+static inline std::enable_if_t<!std::is_floating_point_v<T>, void> visit_mod(
    std::ostream& os,
    const ExprPtr& lhs,
    const ExprPtr& rhs) {
@ -85,7 +85,7 @@ inline std::enable_if_t<!std::is_floating_point_v<T>, void> visit_mod(
 }

 template <typename T>
-inline std::enable_if_t<std::is_floating_point_v<T>, void> visit_mod(
+static inline std::enable_if_t<std::is_floating_point_v<T>, void> visit_mod(
    std::ostream& os,
    const ExprPtr& lhs,
    const ExprPtr& rhs) {
@ -93,35 +93,35 @@ inline std::enable_if_t<std::is_floating_point_v<T>, void> visit_mod(
 }

 template <typename T>
-inline std::
+static inline std::
    enable_if_t<std::is_floating_point_v<T> || std::is_integral_v<T>, void>
    visit_max(std::ostream& os, const ExprPtr& lhs, const ExprPtr& rhs) {
  os << "std::max(" << *lhs << ", " << *rhs << ")";
 }

 template <typename T>
-inline std::
+static inline std::
    enable_if_t<!std::is_floating_point_v<T> && !std::is_integral_v<T>, void>
    visit_max(std::ostream& os, const ExprPtr& lhs, const ExprPtr& rhs) {
  os << "(" << *lhs << " < " << *rhs << ") ? " << *rhs << " : " << *lhs;
 }

 template <typename T>
-inline std::
+static inline std::
    enable_if_t<std::is_floating_point_v<T> || std::is_integral_v<T>, void>
    visit_min(std::ostream& os, const ExprPtr& lhs, const ExprPtr& rhs) {
  os << "std::min(" << *lhs << ", " << *rhs << ")";
 }

 template <typename T>
-inline std::
+static inline std::
    enable_if_t<!std::is_floating_point_v<T> && !std::is_integral_v<T>, void>
    visit_min(std::ostream& os, const ExprPtr& lhs, const ExprPtr& rhs) {
  os << *lhs << " < " << *rhs << " ? " << *lhs << " : " << *rhs;
 }

 template <typename T>
-void visit_binary_op(
+static void visit_binary_op(
    std::ostream& os,
    const ExprPtr& lhs,
    const ExprPtr& rhs,
@ -142,7 +142,7 @@ void visit_binary_op(
 }

 template <typename Op>
-void dispatch_binary_op(std::ostream& os, const BinaryOpNode<Op>* v) {
+static void dispatch_binary_op(std::ostream& os, const BinaryOpNode<Op>* v) {
  switch (v->lhs()->dtype().scalar_type()) {
 #define TYPE_CASE(Type, Name)                                      \
  case ScalarType::Name:                                           \
@ -400,6 +400,6 @@ void CppCodeGen::call_raw(const std::vector<void*>& args) {
  os() << "int main() {}" << '\n';
 }

-RegisterCodeGen<CppCodeGen> cpp_codegen_reg("cpp_codegen");
+static RegisterCodeGen<CppCodeGen> cpp_codegen_reg("cpp_codegen");

 } // namespace torch::jit::tensorexpr
--- a/torch/csrc/jit/tensorexpr/eval.cpp
+++ b/torch/csrc/jit/tensorexpr/eval.cpp
@ -10,7 +10,7 @@

 namespace torch::jit::tensorexpr {

-RegisterCodeGen<SimpleIREvaluator> ir_eval_codegen_reg("simple_ir_eval");
+static RegisterCodeGen<SimpleIREvaluator> ir_eval_codegen_reg("simple_ir_eval");

 int64_t InterpValue::intValue() const {
 #define TYPE_CASE(Type, Name)        \
@ -24,43 +24,42 @@ int64_t InterpValue::intValue() const {
 }

 template <typename T>
-inline std::enable_if_t<std::is_integral_v<T>, T> mod_value(T lhs, T rhs) {
+static inline std::enable_if_t<std::is_integral_v<T>, T> mod_value(
+    T lhs,
+    T rhs) {
  return lhs % rhs;
 }

 template <typename T>
-inline std::enable_if_t<std::is_floating_point_v<T>, T> mod_value(
+static inline std::enable_if_t<std::is_floating_point_v<T>, T> mod_value(
    T lhs,
    T rhs) {
  return std::fmod(lhs, rhs);
 }

-inline bool mod_value(bool lhs, bool rhs) {
+static inline bool mod_value(bool lhs, bool rhs) {
  throw std::runtime_error("Attempted modulus of bool");
 }

 template <typename T>
-inline std::enable_if_t<std::is_integral_v<T>, T> div_value(T lhs, T rhs) {
+static inline std::enable_if_t<std::is_integral_v<T>, T> div_value(
+    T lhs,
+    T rhs) {
  TORCH_CHECK(rhs != 0, "Division by zero");
  return lhs / rhs;
 }

 template <typename T>
-inline std::enable_if_t<std::is_floating_point_v<T>, T>
+static inline std::enable_if_t<std::is_floating_point_v<T>, T>
    __ubsan_ignore_float_divide_by_zero__ div_value(T lhs, T rhs) {
  return lhs / rhs;
 }

-inline bool div_value(bool lhs, bool rhs) {
-  LOG(FATAL) << "Attempted division of bool";
-  return false;
-}
-
-inline c10::Half div_value(c10::Half lhs, c10::Half rhs) {
+static inline c10::Half div_value(c10::Half lhs, c10::Half rhs) {
  return lhs / rhs;
 }

-inline c10::BFloat16 div_value(c10::BFloat16 lhs, c10::BFloat16 rhs) {
+static inline c10::BFloat16 div_value(c10::BFloat16 lhs, c10::BFloat16 rhs) {
  return lhs / rhs;
 }

--- a/torch/csrc/jit/tensorexpr/ir_printer.cpp
+++ b/torch/csrc/jit/tensorexpr/ir_printer.cpp
@ -60,7 +60,7 @@ template <
    std::enable_if_t<std::is_same_v<
        decltype(detail::bin_op_deducer(std::declval<Op>())),
        void>>* = nullptr>
-void visitBinaryOp(
+static void visitBinaryOp(
    NodePtr<Op> v,
    const std::string& op_str,
    IRPrinter* printer,
--- a/torch/csrc/jit/tensorexpr/ir_simplifier.cpp
+++ b/torch/csrc/jit/tensorexpr/ir_simplifier.cpp
@ -8,7 +8,7 @@
 namespace torch::jit::tensorexpr {

 // Creates a new Expr of the given type with the provided lhs and rhs.
-inline ExprPtr newBinaryOpOfType(
+static inline ExprPtr newBinaryOpOfType(
    IRNodeType expr_type,
    const ExprPtr& lhs,
    const ExprPtr& rhs,
@ -72,7 +72,7 @@ static ExprPtr mutateBinaryOp(

 // Simple recursive GCD.
 template <typename T>
-T gcd(T a, T b) {
+static T gcd(T a, T b) {
  if (b == 0) {
    return a;
  }
@ -205,7 +205,7 @@ void MinTerm::uniquefy() {

 // Handles optimization cases for Broadcast/Ramp +/- Broadcast/Ramp
 template <class Op>
-ExprPtr combineMultilane(const ExprPtr& lhs, const ExprPtr& rhs) {
+static ExprPtr combineMultilane(const ExprPtr& lhs, const ExprPtr& rhs) {
  if (BroadcastPtr bc = to<Broadcast>(lhs)) {
    if (BroadcastPtr bcother = to<Broadcast>(rhs)) {
      if (bc->lanes() != bcother->lanes()) {
--- a/torch/csrc/jit/tensorexpr/ir_verifier.cpp
+++ b/torch/csrc/jit/tensorexpr/ir_verifier.cpp
@ -19,7 +19,7 @@ template <
    std::enable_if_t<
        std::is_same_v<decltype(detail::deducer(std::declval<D>())), void>>* =
        nullptr>
-void verifyBitwiseOp(NodePtr<D> v, IRVerifier* verifier) {
+static void verifyBitwiseOp(NodePtr<D> v, IRVerifier* verifier) {
  if (!v->lhs()->dtype().is_integral()) {
    throw unsupported_dtype();
  }
--- a/torch/csrc/jit/tensorexpr/kernel.cpp
+++ b/torch/csrc/jit/tensorexpr/kernel.cpp
@ -1242,7 +1242,7 @@ NNCLoweringFunction TensorExprKernel::getCustomLoweringFor(
 }

 template <typename T>
-std::vector<size_t> reverse_sort_indices(const std::vector<T>& v) {
+static std::vector<size_t> reverse_sort_indices(const std::vector<T>& v) {
  // initialize original index locations
  std::vector<size_t> idx(v.size());
  iota(idx.begin(), idx.end(), 0);
--- a/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp
+++ b/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp
@ -60,7 +60,7 @@ static std::vector<std::vector<ForPtr>> GetAllPerfectlyNestedLoopNests(
 }

 template <typename T>
-std::tuple<std::vector<T>, std::vector<int>> select_n_randomly(
+static std::tuple<std::vector<T>, std::vector<int>> select_n_randomly(
    std::vector<T>& objects,
    int n,
    std::default_random_engine& random_engine) {
@ -100,7 +100,7 @@ static void printHistory(int index, std::string message) {
 }

 template <typename T>
-std::string join(std::vector<T> indices, char sep = ',') {
+static std::string join(std::vector<T> indices, char sep = ',') {
  std::string s;
  for (const auto& index : indices) {
    s += std::to_string(index) + sep;
@ -118,7 +118,7 @@ static std::string join(
  return s;
 }
 template <typename T>
-std::string indexOf(const std::vector<T>& objects, const T& object) {
+static std::string indexOf(const std::vector<T>& objects, const T& object) {
  return std::to_string(std::distance(
      objects.begin(), std::find(objects.begin(), objects.end(), object)));
 }
--- a/torch/csrc/jit/tensorexpr/tensorexpr_init.cpp
+++ b/torch/csrc/jit/tensorexpr/tensorexpr_init.cpp
@ -15,6 +15,7 @@
 #include <torch/csrc/jit/tensorexpr/loopnest.h>
 #include <torch/csrc/jit/tensorexpr/lowerings.h>
 #include <torch/csrc/jit/tensorexpr/reduction.h>
+#include <torch/csrc/jit/tensorexpr/tensorexpr_init.h>

 #include <utility>

@ -25,7 +26,7 @@ struct pybind11::detail::type_caster<torch::jit::tensorexpr::ArgValue>
 namespace torch::jit {
 using namespace torch::jit::tensorexpr;

-ArgValue convertPyToArgValue(py::handle inp) {
+static ArgValue convertPyToArgValue(py::handle inp) {
  if (py::isinstance<BufHandle>(inp)) {
    return py::cast<BufHandle>(inp);
  } else if (py::isinstance<VarHandle>(inp)) {
@ -54,7 +55,7 @@ ArgValue convertPyToArgValue(py::handle inp) {
  }
 }

-Dtype parsePythonDtype(py::handle obj) {
+static Dtype parsePythonDtype(py::handle obj) {
  if (THPDtype_Check(obj.ptr())) {
    return Dtype(reinterpret_cast<THPDtype*>(obj.ptr())->scalar_type);
  } else {
--- a/torch/csrc/lazy/core/config.cpp
+++ b/torch/csrc/lazy/core/config.cpp
@ -86,5 +86,4 @@ std::string& getLTCForceFallback() {
  return config;
 }

-// NOLINTEND(misc-use-internal-linkage)
 } // namespace torch::lazy
--- a/torch/csrc/lazy/core/ir.cpp
+++ b/torch/csrc/lazy/core/ir.cpp
@ -6,7 +6,6 @@
 #include <torch/csrc/lazy/core/ir_metadata.h>

 // Enables caching on for dynamic shapes (aka disable hash on shapes)
-// NOLINTNEXTLINE(misc-use-internal-linkage)
 // clang-format off
 C10_DEFINE_bool(
    ltc_enable_dynamic_shapes,
--- a/torch/csrc/lazy/core/shape.cpp
+++ b/torch/csrc/lazy/core/shape.cpp
@ -4,7 +4,6 @@

 #include <utility>

-// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_bool(
    ltc_enable_symbolic_shapes,
    false,
--- a/torch/csrc/lazy/ts_backend/config.cpp
+++ b/torch/csrc/lazy/ts_backend/config.cpp
@ -1,7 +1,7 @@
 #include <torch/csrc/lazy/core/config.h>
+#include <torch/csrc/lazy/ts_backend/config.h>

 // TODO(whc) unclear if this is useful, has only been tested as true
-// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_bool(
    torch_lazy_ts_tensor_update_sync,
    true,
@ -9,7 +9,6 @@ C10_DEFINE_bool(

 // TODO(whc) we need to hook up these flags in a more useful way
 // possibly also keep LTC_TS_CUDA env working?
-// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_bool(
    torch_lazy_ts_cuda,
    false,
--- a/torch/lib/libshm/core.cpp
+++ b/torch/lib/libshm/core.cpp
@ -8,10 +8,10 @@
 #include <libshm/libshm.h>
 #include <libshm/socket.h>

-std::unordered_map<std::string, ClientSocket> managers;
-std::string manager_executable_path;
+static std::unordered_map<std::string, ClientSocket> managers;
+static std::string manager_executable_path;

-AllocInfo get_alloc_info(const char* filename) {
+static AllocInfo get_alloc_info(const char* filename) {
  AllocInfo info = {};
  info.pid = getpid();
  info.free = false;
@ -23,7 +23,7 @@ AllocInfo get_alloc_info(const char* filename) {
  return info;
 }

-void start_manager() {
+static void start_manager() {
  std::array<int, 2> pipe_ends;
  SYSCHECK_ERR_RETURN_NEG1(pipe(pipe_ends.data()));

@ -78,7 +78,7 @@ void start_manager() {
  managers.emplace(std::move(handle), std::move(manager));
 }

-ClientSocket& get_manager_socket(const std::string& manager_handle) {
+static ClientSocket& get_manager_socket(const std::string& manager_handle) {
  auto it = managers.find(manager_handle);
  if (it == managers.end()) {
    auto socket = ClientSocket(manager_handle);
--- a/torch/lib/libshm/manager.cpp
+++ b/torch/lib/libshm/manager.cpp
@ -32,19 +32,19 @@ struct ClientSession {
  pid_t pid;
 };

-std::vector<struct pollfd> pollfds;
-std::unordered_map<int, ClientSession> client_sessions;
+static std::vector<struct pollfd> pollfds;
+static std::unordered_map<int, ClientSession> client_sessions;
 // TODO: check if objects have been freed from time to time
-std::set<std::string> used_objects;
+static std::set<std::string> used_objects;

-void register_fd(int fd) {
+static void register_fd(int fd) {
  struct pollfd pfd = {};
  pfd.fd = fd;
  pfd.events = POLLIN;
  pollfds.push_back(pfd);
 }

-void unregister_fd(int fd) {
+static void unregister_fd(int fd) {
  pollfds.erase(
      std::remove_if(
          pollfds.begin(),
@ -54,7 +54,7 @@ void unregister_fd(int fd) {
  client_sessions.erase(fd);
 }

-void print_init_message(std::string_view message) {
+static void print_init_message(std::string_view message) {
  ssize_t written_bytes = -1;
  while (!message.empty()) {
    // NOLINTNEXTLINE(bugprone-assignment-in-if-condition)
@ -69,7 +69,7 @@ void print_init_message(std::string_view message) {
  }
 }

-bool object_exists(const char* name) {
+static bool object_exists(const char* name) {
  int fd = shm_open(name, O_RDONLY, 0);
  if (fd >= 0) {
    close(fd);
@ -79,7 +79,7 @@ bool object_exists(const char* name) {
  }
 }

-void free_used_object(const std::string& name) {
+static void free_used_object(const std::string& name) {
  if (!object_exists(name.c_str())) {
    DEBUG("object %s appears to have been freed", name.c_str());
    used_objects.erase(name);