Enable misc-use-internal-linkage check and apply fixes (#148948)

Enables clang-tidy rule [`misc-use-internal-linkage`](https://clang.llvm.org/extra/clang-tidy/checks/misc/use-internal-linkage.html). This new check was introduced in Clang-Tidy 18 and is available due to recent update of Clang-Tidy 19. The check marks functions and variables used only in the translation unit as static. Therefore undesired symbols are not leaked into other units, more link time optimisations are possible and the resulting binaries may be smaller. The detected violations were mostly fixed by using static. In other cases, the symbols were indeed consumed by others files, then their declaring headers were included. Still some declarations were wrong and have been fixed. Pull Request resolved: https://github.com/pytorch/pytorch/pull/148948 Approved by: https://github.com/Skylion007
2025-10-20 21:14:14 +08:00 · 2025-03-12 14:22:56 +00:00
parent f349304c08
commit 8fa81a6066
80 changed files with 288 additions and 217 deletions
--- a/.clang-tidy
+++ b/.clang-tidy
@ -48,7 +48,6 @@ misc-*,
 -misc-no-recursion,
 -misc-non-private-member-variables-in-classes,
 -misc-unused-using-decls,
-misc-use-internal-linkage,
 modernize-*,
 -modernize-macro-to-enum,
 -modernize-return-braced-init-list,
--- a/aten/src/ATen/CPUGeneratorImpl.cpp
+++ b/aten/src/ATen/CPUGeneratorImpl.cpp
@ -69,7 +69,7 @@ Generator createCPUGenerator(uint64_t seed_val) {
 * Helper function to concatenate two 32 bit unsigned int
 * and return them as a 64 bit unsigned int
 */
-inline uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) {
+inline static uint64_t make64BitsFrom32Bits(uint32_t hi, uint32_t lo) {
  return (static_cast<uint64_t>(hi) << 32) | lo;
 }

--- a/aten/src/ATen/Context.cpp
+++ b/aten/src/ATen/Context.cpp
@ -588,7 +588,7 @@ Allocator* getCPUAllocator() {
 //    means the allow_tf32 flags are overridden and tf32 is force disabled
 // override_allow_tf32_flag = false
 //    means the original allow_tf32 flags are followed
-thread_local bool override_allow_tf32_flag = false;
+thread_local static bool override_allow_tf32_flag = false;

 NoTF32Guard::NoTF32Guard() {
  if (!override_allow_tf32_flag) {
@ -611,7 +611,7 @@ bool NoTF32Guard::should_disable_tf32() {
 // This information can be used, for example, to select implementations
 // with different numerical or performance characteristics.
 // See https://pytorch.org/docs/stable/notes/numerical_accuracy.html for details.
-thread_local bool rocm_is_backward_pass;
+thread_local static bool rocm_is_backward_pass;

 ROCmBackwardPassGuard::ROCmBackwardPassGuard() {
  rocm_is_backward_pass = true;
--- a/aten/src/ATen/EmptyTensor.cpp
+++ b/aten/src/ATen/EmptyTensor.cpp
@ -172,7 +172,7 @@ SymInt computeStorageNbytes(
 }

 template <typename T>
-TensorBase _empty_generic(
+static TensorBase _empty_generic(
    ArrayRef<T> size,
    c10::Allocator* allocator,
    c10::DispatchKeySet ks,
@ -225,7 +225,7 @@ TensorBase empty_generic_symint(
 }

 template <typename T>
-TensorBase _empty_strided_generic(
+static TensorBase _empty_strided_generic(
    T size,
    T stride,
    c10::Allocator* allocator,
--- a/aten/src/ATen/ExpandUtils.cpp
+++ b/aten/src/ATen/ExpandUtils.cpp
@ -59,7 +59,7 @@ SymDimVector infer_size_symdimvector(SymIntArrayRef a, SymIntArrayRef b) {
 }

 template<typename Container>
-C10_ALWAYS_INLINE InferExpandGeometryResult<Container> inferExpandGeometryImpl(
+C10_ALWAYS_INLINE static InferExpandGeometryResult<Container> inferExpandGeometryImpl(
    IntArrayRef tensor_sizes,
    IntArrayRef tensor_strides,
    IntArrayRef sizes) {
--- a/aten/src/ATen/FunctionalTensorWrapper.cpp
+++ b/aten/src/ATen/FunctionalTensorWrapper.cpp
@ -737,7 +737,7 @@ bool isFunctionalTensor(const c10::List<::std::optional<Tensor>>& t_list) {
 }

 template <typename T>
-bool isFunctionalTensorIListRef(c10::IListRef<T> list) {
+static bool isFunctionalTensorIListRef(c10::IListRef<T> list) {
  if (list.size() == 0) return false;
  auto functional_count = 0;
  for (const auto& tensor : list) {
@ -803,7 +803,7 @@ void set_sizes_strides_offset(const std::vector<Tensor>& outs, const std::vector
  }
 }

-thread_local bool _functionalizationReapplyViews;
+thread_local static bool _functionalizationReapplyViews;

 bool getFunctionalizationReapplyViewsTLS() {
  return _functionalizationReapplyViews;
--- a/aten/src/ATen/LegacyVmapMode.cpp
+++ b/aten/src/ATen/LegacyVmapMode.cpp
@ -2,7 +2,7 @@

 namespace at::impl {

-thread_local int64_t VmapMode_current_vmap_level = 0;
+thread_local static int64_t VmapMode_current_vmap_level = 0;

 int64_t VmapMode::current_vmap_level() {
  return VmapMode_current_vmap_level;
--- a/aten/src/ATen/NestedTensorImpl.cpp
+++ b/aten/src/ATen/NestedTensorImpl.cpp
@ -71,7 +71,7 @@ c10::DispatchKeySet get_view_key_set(const at::Tensor& base) {

 namespace at::native {

-inline std::vector<int64_t> construct_opt_sizes(const at::Tensor& sizes) {
+inline static std::vector<int64_t> construct_opt_sizes(const at::Tensor& sizes) {
  // torch.tensor([]) is considered to have `dim() = 1` and `size(0) = 0`
  // torch.nested_tensor([]) should also has `dim() = 1` and `size(0) = 0`
  if (sizes.dim() == 0) {
--- a/aten/src/ATen/TensorGeometry.cpp
+++ b/aten/src/ATen/TensorGeometry.cpp
@ -5,7 +5,7 @@ namespace at {

 // See TensorGeometry.h on why this is useful now that we cache is_contiguous.
 template <typename T>
-bool _geometry_is_contiguous(ArrayRef<T> sizes, ArrayRef<T> strides) {
+static bool _geometry_is_contiguous(ArrayRef<T> sizes, ArrayRef<T> strides) {
  assert(!overflows<std::int64_t>(sizes.size()));
  auto dim = static_cast<std::int64_t>(sizes.size());
  T expected_stride = 1;
--- a/aten/src/ATen/TensorUtils.cpp
+++ b/aten/src/ATen/TensorUtils.cpp
@ -327,7 +327,7 @@ std::vector<int64_t> defaultStrides(IntArrayRef sizes) {
 // see overloads of computeStride() below.
 //
 template <typename ResultVec, typename NewShapeVec, typename Numel>
-inline std::optional<ResultVec> computeStride_impl(
+inline static std::optional<ResultVec> computeStride_impl(
    const NewShapeVec& oldshape,
    const NewShapeVec& oldstride,
    const NewShapeVec& newshape,
--- a/aten/src/ATen/VmapModeRegistrations.cpp
+++ b/aten/src/ATen/VmapModeRegistrations.cpp
@ -20,12 +20,12 @@ namespace at {
 // We haven't made a decision on that yet so we are temporarily banning random
 // operations inside of vmap while we gather user feedback.

-template <typename... Args> Tensor unsupportedRandomOp(Args... args) {
+template <typename... Args> static Tensor unsupportedRandomOp(Args... args) {
  TORCH_CHECK(false, "vmap: We do not yet support calling random operations inside of vmap. ",
              "Please perform random operations outside of vmap as a workaround");
 }

-template <typename... Args> Tensor& unsupportedRandomOp_(Args... args) {
+template <typename... Args> static Tensor& unsupportedRandomOp_(Args... args) {
  TORCH_CHECK(false, "vmap: We do not yet support calling random operations inside of vmap. ",
              "Please perform random operations outside of vmap as a workaround");
 }
--- a/aten/src/ATen/autocast_mode.h
+++ b/aten/src/ATen/autocast_mode.h
@ -123,6 +123,7 @@ TORCH_API inline void set_autocast_gpu_dtype(at::ScalarType dtype) {
  _(privateuseone, at::kPrivateUse1)

 // deprecated other backend specific autocast APIs
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 AT_FORALL_DEPRECATED_AUTOCAST_BACKENDS(DECLARE_DEPRECATED_AUTOCAST_APIS)

 const std::array<at::DeviceType, 9> _AUTOCAST_SUPPORTED_DEVICES{
--- a/aten/src/ATen/core/Formatting.cpp
+++ b/aten/src/ATen/core/Formatting.cpp
@ -43,7 +43,7 @@ std::string toString(const Scalar& s) {
 namespace at {

 //not all C++ compilers have default float so we define our own here
-inline std::ios_base& defaultfloat(std::ios_base& __base) {
+inline static std::ios_base& defaultfloat(std::ios_base& __base) {
  __base.unsetf(std::ios_base::floatfield);
  return __base;
 }
--- a/aten/src/ATen/core/IListRef_test.cpp
+++ b/aten/src/ATen/core/IListRef_test.cpp
@ -42,7 +42,7 @@ static std::vector<at::OptionalTensorRef> get_unboxed_opt_tensor_vector() {
 }

 template <typename T>
-void check_elements_same(at::ITensorListRef list, const T& thing, int use_count) {
+static void check_elements_same(at::ITensorListRef list, const T& thing, int use_count) {
  EXPECT_EQ(thing.size(), list.size());
  size_t i = 0;
  for (const auto& t : list) {
--- a/aten/src/ATen/core/NamedTensor.cpp
+++ b/aten/src/ATen/core/NamedTensor.cpp
@ -5,7 +5,7 @@

 namespace at {

-thread_local bool NamesMode_enabled = true;
+thread_local static bool NamesMode_enabled = true;

 bool NamesMode::is_enabled() {
  return NamesMode_enabled;
--- a/aten/src/ATen/cuda/CUDABlas.cpp
+++ b/aten/src/ATen/cuda/CUDABlas.cpp
@ -329,7 +329,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<


 template <typename Dtype>
-inline void bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES(Dtype)) {
+static inline void bgemm_internal_cublaslt(CUDABLAS_BGEMM_ARGTYPES(Dtype)) {
  cudaDataType_t abcType = CUDA_R_32F;
  cublasComputeType_t computeType = CUBLAS_COMPUTE_32F;
  cudaDataType_t scaleType = CUDA_R_32F;
--- a/aten/src/ATen/functorch/BatchRulesPooling.cpp
+++ b/aten/src/ATen/functorch/BatchRulesPooling.cpp
@ -12,7 +12,7 @@
 namespace at::functorch {

 template <typename Func>
-std::tuple<Tensor, std::optional<int64_t>,Tensor, std::optional<int64_t>>
+static std::tuple<Tensor, std::optional<int64_t>,Tensor, std::optional<int64_t>>
 max_pool_with_indices_batch_rule_helper(
  const Tensor& self, std::optional<int64_t> self_bdim,
  IntArrayRef kernel_size, IntArrayRef stride,
--- a/aten/src/ATen/functorch/BatchRulesRandomness.cpp
+++ b/aten/src/ATen/functorch/BatchRulesRandomness.cpp
@ -20,7 +20,7 @@
 namespace at::functorch {

 template <typename F, F Func, typename... ExtraArgs>
-Tensor random_batching_rule(SymIntArrayRef shape, ExtraArgs... extra_args) {
+static Tensor random_batching_rule(SymIntArrayRef shape, ExtraArgs... extra_args) {
  c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::FuncTorchVmapMode);
  auto maybe_layer = maybeCurrentDynamicLayer();
  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
@ -37,7 +37,7 @@ Tensor random_batching_rule(SymIntArrayRef shape, ExtraArgs... extra_args) {
 }

 template <typename F, F Func, typename... ExtraArgs>
-Tensor& random_inplace_batching_rule(Tensor& self, ExtraArgs... extra_args) {
+static Tensor& random_inplace_batching_rule(Tensor& self, ExtraArgs... extra_args) {
  c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::FuncTorchVmapMode);
  auto maybe_layer = maybeCurrentDynamicLayer();
  TORCH_INTERNAL_ASSERT(maybe_layer.has_value());
@ -108,7 +108,7 @@ static Tensor& bernoulli_inplace_Tensor_batching_rule(Tensor& self, const Tensor
 }

 template <typename F, F Func, typename... ExtraArgs>
-Tensor randperm_batching_rule(int64_t n, ExtraArgs... extra_args) {
+static Tensor randperm_batching_rule(int64_t n, ExtraArgs... extra_args) {
  c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::FuncTorchVmapMode);
  auto maybe_layer = maybeCurrentDynamicLayer();
  auto const batch_size = maybe_layer->batchSize();
@ -127,7 +127,7 @@ Tensor randperm_batching_rule(int64_t n, ExtraArgs... extra_args) {
 }

 template <typename F, F Func, typename... ExtraArgs>
-Tensor unary_pointwise_random_batch_rule(const Tensor& tensor, ExtraArgs... extra_args) {
+static Tensor unary_pointwise_random_batch_rule(const Tensor& tensor, ExtraArgs... extra_args) {
  c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::FuncTorchVmapMode);
  auto maybe_layer = maybeCurrentDynamicLayer();
  const auto cur_level = maybe_layer->layerId();
@ -153,7 +153,7 @@ Tensor unary_pointwise_random_batch_rule(const Tensor& tensor, ExtraArgs... extr
 }

 template<typename F, F Func, typename... ExtraArgs>
-Tensor tensor_like_random_batch_rule(const Tensor& self, ExtraArgs... extra_args) {
+static Tensor tensor_like_random_batch_rule(const Tensor& self, ExtraArgs... extra_args) {
  c10::impl::ExcludeDispatchKeyGuard guard(DispatchKey::FuncTorchVmapMode);
  auto maybe_layer = maybeCurrentDynamicLayer();
  const auto cur_level = maybe_layer->layerId();
@ -272,7 +272,7 @@ struct RandomBatchRuleHelper<F, Func, typelist<T1, T...>> {
 };

 template <typename F, F Func, typename... T>
-Tensor rand_int_wrapper(SymIntArrayRef shape, c10::SymInt high, T... extra_args) {
+static Tensor rand_int_wrapper(SymIntArrayRef shape, c10::SymInt high, T... extra_args) {
  return Func(high, shape, std::forward<T>(extra_args)...);
 }

@ -299,7 +299,7 @@ struct RandIntBatchRuleHelper<F, Func, typelist<T1, T2, T...>> {
 };

 template <typename F, F Func, typename T0, typename T1, typename... T>
-Tensor rand_int_low_wrapper(SymIntArrayRef shape, T0 scalar0, T1 scalar1, T... extra_args) {
+static Tensor rand_int_low_wrapper(SymIntArrayRef shape, T0 scalar0, T1 scalar1, T... extra_args) {
  return Func(scalar0, scalar1, shape, std::forward<T>(extra_args)...);
 }

@ -346,7 +346,7 @@ struct NormalPointwiseBatchRule<F, Func, typelist<A0, T...>> {
 };

 template<typename F, F Func, typename... T>
-Tensor normal_wrapper(const Tensor& tensor, double scalar, T... extra_args) {
+static Tensor normal_wrapper(const Tensor& tensor, double scalar, T... extra_args) {
  return Func(scalar, tensor, extra_args...);
 }

--- a/aten/src/ATen/functorch/BatchedFallback.cpp
+++ b/aten/src/ATen/functorch/BatchedFallback.cpp
@ -19,7 +19,7 @@

 namespace at::functorch {

-bool kVmapFallbackWarningEnabled = true;
+static bool kVmapFallbackWarningEnabled = true;

 bool isVmapFallbackWarningEnabled() {
  return kVmapFallbackWarningEnabled;
@ -29,7 +29,7 @@ void setVmapFallbackWarningEnabled(bool enabled) {
  kVmapFallbackWarningEnabled = enabled;
 }

-bool kVmapFallbackEnabled = true;
+static bool kVmapFallbackEnabled = true;

 bool isVmapFallbackEnabled() {
  return kVmapFallbackEnabled;
--- a/aten/src/ATen/native/mkldnn/xpu/detail/QConv.cpp
+++ b/aten/src/ATen/native/mkldnn/xpu/detail/QConv.cpp
@ -5,6 +5,7 @@

 #include <ATen/native/mkldnn/xpu/detail/Attr.h>
 #include <ATen/native/mkldnn/xpu/detail/Utils.h>
+#include <ATen/native/mkldnn/xpu/detail/oneDNN.h>
 #include <ATen/native/mkldnn/xpu/detail/oneDNNContext.h>

 #include <oneapi/dnnl/dnnl.hpp>
--- a/aten/src/ATen/native/mkldnn/xpu/qconv.cpp
+++ b/aten/src/ATen/native/mkldnn/xpu/qconv.cpp
@ -19,7 +19,7 @@ static inline c10::ScalarType qconv_decide_out_dtype(
  return dst_dtype;
 }

-at::Tensor qconv_prepack_xpu(
+static at::Tensor qconv_prepack_xpu(
    at::Tensor weight,
    at::Tensor weight_scales,
    double input_scale,
--- a/aten/src/ATen/native/mkldnn/xpu/qlinear.cpp
+++ b/aten/src/ATen/native/mkldnn/xpu/qlinear.cpp
@ -19,7 +19,7 @@ static inline c10::ScalarType qlinear_decide_out_dtype(
  return dst_dtype;
 }

-Tensor q_linear_pointwise(
+static Tensor q_linear_pointwise(
    Tensor act,
    double act_scale,
    int64_t act_zero_point,
@ -78,7 +78,7 @@ Tensor q_linear_pointwise(
  return qout;
 }

-Tensor q_linear_pointwise_tensor(
+static Tensor q_linear_pointwise_tensor(
    Tensor act,
    Tensor act_scale,
    Tensor act_zero_point,
@ -137,7 +137,7 @@ Tensor q_linear_pointwise_tensor(
  return qout;
 }

-Tensor q_linear_pointwise_binary(
+static Tensor q_linear_pointwise_binary(
    Tensor act,
    double act_scale,
    int64_t act_zero_point,
@ -208,7 +208,7 @@ Tensor q_linear_pointwise_binary(
  return dim == 3 ? qout.reshape({act.size(0), -1, N}) : qout;
 }

-Tensor q_linear_pointwise_binary_tensor(
+static Tensor q_linear_pointwise_binary_tensor(
    Tensor act,
    Tensor act_scale,
    Tensor act_zero_point,
@ -248,7 +248,7 @@ Tensor q_linear_pointwise_binary_tensor(
      unary_post_op_algorithm);
 }

-at::Tensor q_linear_prepack_onednn(
+static at::Tensor q_linear_prepack_onednn(
    at::Tensor weight,
    std::optional<torch::List<int64_t>> input_shape) {
  at::Tensor weight_transposed = weight.transpose(0, 1);
--- a/aten/src/ATen/native/nested/NestedTensorBinaryOps.cpp
+++ b/aten/src/ATen/native/nested/NestedTensorBinaryOps.cpp
@ -72,7 +72,7 @@ static get_elementwise_nested_tensor_impl(
 }

 template <typename Func>
-Tensor NestedTensor_elementwise_Tensor(
+static Tensor NestedTensor_elementwise_Tensor(
    const Tensor& self,
    const Tensor& other,
    const std::string& op_name,
@ -234,7 +234,7 @@ Tensor NestedTensor_masked_fill(


 template <typename Func>
-Tensor& NestedTensor_elementwise__Tensor(
+static Tensor& NestedTensor_elementwise__Tensor(
    Tensor& self,
    const Tensor& other,
    const std::string& op_name,
--- a/c10/core/CPUAllocator.cpp
+++ b/c10/core/CPUAllocator.cpp
@ -289,7 +289,9 @@ void ProfiledCPUMemoryReporter::OutOfMemory(size_t nbytes) {
  }
 }

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_API at::Allocator* cpu_caching_alloc = nullptr;
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_API uint8_t cpu_caching_alloc_priority = 0;

 void SetCPUCachingAllocator(Allocator* alloc, uint8_t priority) {
--- a/c10/core/impl/HermeticPyObjectTLS.cpp
+++ b/c10/core/impl/HermeticPyObjectTLS.cpp
@ -2,7 +2,7 @@

 namespace c10::impl {

-thread_local std::atomic<bool> hermeticPyObjectState{false};
+thread_local static std::atomic<bool> hermeticPyObjectState{false};

 std::atomic<bool> HermeticPyObjectTLS::haveState_{false};

--- a/c10/core/impl/PythonDispatcherTLS.cpp
+++ b/c10/core/impl/PythonDispatcherTLS.cpp
@ -4,7 +4,7 @@

 namespace c10::impl {

-thread_local PyInterpreter* pythonDispatcherState;
+thread_local static PyInterpreter* pythonDispatcherState;

 void PythonDispatcherTLS::set_state(PyInterpreter* state) {
  if (state) {
--- a/c10/core/impl/TorchDispatchModeTLS.cpp
+++ b/c10/core/impl/TorchDispatchModeTLS.cpp
@ -8,7 +8,7 @@

 namespace c10::impl {

-thread_local TorchDispatchModeTLS torchDispatchModeState;
+thread_local static TorchDispatchModeTLS torchDispatchModeState;

 bool TorchDispatchModeTLS::any_modes_set(bool skip_infra_modes) {
  if (!torchDispatchModeState.stack_.empty())
--- a/c10/core/impl/alloc_cpu.cpp
+++ b/c10/core/impl/alloc_cpu.cpp
@ -19,11 +19,13 @@
 #endif

 // TODO: rename flags to C10
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_bool(
    caffe2_cpu_allocator_do_zero_fill,
    false,
    "If set, do memory zerofilling when allocating on CPU")

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_bool(
    caffe2_cpu_allocator_do_junk_fill,
    false,
--- a/c10/cuda/CUDACachingAllocator.cpp
+++ b/c10/cuda/CUDACachingAllocator.cpp
@ -41,6 +41,7 @@ TORCH_SDT_DEFINE_SEMAPHORE(free)

 namespace c10 {

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_REGISTRY(FreeCudaMemoryCallbacksRegistry, FreeMemoryCallback)

 namespace cuda::CUDACachingAllocator {
@ -3942,6 +3943,7 @@ void local_raw_delete(void* ptr) {

 namespace CudaMallocAsync {
 // If this is put in its own header file, it gets incorrectly renamed in HIPify.
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 CUDAAllocator* allocator();

 } // namespace CudaMallocAsync
--- a/c10/cuda/CUDAFunctions.cpp
+++ b/c10/cuda/CUDAFunctions.cpp
@ -178,6 +178,7 @@ static bool dummyHasPrimaryContext([[maybe_unused]] DeviceIndex device_index) {
 static bool (*hasPrimaryContext)(DeviceIndex) = dummyHasPrimaryContext;

 // Private api to be called from CUDAHooks.cpp
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_CUDA_API void setHasPrimaryContext(bool (*func)(DeviceIndex)) {
  hasPrimaryContext = func ? func : dummyHasPrimaryContext;
 }
--- a/c10/cuda/CUDAMallocAsyncAllocator.cpp
+++ b/c10/cuda/CUDAMallocAsyncAllocator.cpp
@ -920,11 +920,13 @@ static CudaMallocAsyncAllocator device_allocator;
 void local_raw_delete(void* ptr) {
  freeAsync(ptr);
 }
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 CUDAAllocator* allocator() {
  return &device_allocator;
 }

 #else
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 CUDAAllocator* allocator() {
  TORCH_CHECK(false, "Cannot use CudaMallocAsyncAllocator with cuda < 11.4.");
  return nullptr;
--- a/c10/cuda/impl/CUDATest.cpp
+++ b/c10/cuda/impl/CUDATest.cpp
@ -7,7 +7,7 @@

 namespace c10::cuda::impl {

-bool has_cuda_gpu() {
+static bool has_cuda_gpu() {
  int count = 0;
  C10_CUDA_IGNORE_ERROR(cudaGetDeviceCount(&count));

@ -22,9 +22,4 @@ int c10_cuda_test() {
  return r;
 }

-// This function is not exported
-int c10_cuda_private_test() {
-  return 2;
-}
-
 } // namespace c10::cuda::impl
--- a/c10/test/core/CompileTimeFunctionPointer_test.cpp
+++ b/c10/test/core/CompileTimeFunctionPointer_test.cpp
@ -4,13 +4,13 @@
 namespace test_is_compile_time_function_pointer {
 static_assert(!c10::is_compile_time_function_pointer<void()>::value);

-void dummy() {}
+static void dummy() {}
 static_assert(
    c10::is_compile_time_function_pointer<TORCH_FN_TYPE(dummy)>::value);
 } // namespace test_is_compile_time_function_pointer

 namespace test_access_through_type {
-void dummy() {}
+static void dummy() {}
 using dummy_ptr = TORCH_FN_TYPE(dummy);
 static_assert(c10::is_compile_time_function_pointer<dummy_ptr>::value);
 static_assert(dummy_ptr::func_ptr() == &dummy);
@ -18,14 +18,14 @@ static_assert(std::is_same_v<void(), dummy_ptr::FuncType>);
 } // namespace test_access_through_type

 namespace test_access_through_value {
-void dummy() {}
+static void dummy() {}
 constexpr auto dummy_ptr = TORCH_FN(dummy);
 static_assert(dummy_ptr.func_ptr() == &dummy);
 static_assert(std::is_same_v<void(), decltype(dummy_ptr)::FuncType>);
 } // namespace test_access_through_value

 namespace test_access_through_type_also_works_if_specified_as_pointer {
-void dummy() {}
+static void dummy() {}
 using dummy_ptr = TORCH_FN_TYPE(&dummy);
 static_assert(c10::is_compile_time_function_pointer<dummy_ptr>::value);
 static_assert(dummy_ptr::func_ptr() == &dummy);
@ -33,14 +33,14 @@ static_assert(std::is_same_v<void(), dummy_ptr::FuncType>);
 } // namespace test_access_through_type_also_works_if_specified_as_pointer

 namespace test_access_through_value_also_works_if_specified_as_pointer {
-void dummy() {}
+static void dummy() {}
 constexpr auto dummy_ptr = TORCH_FN(&dummy);
 static_assert(dummy_ptr.func_ptr() == &dummy);
 static_assert(std::is_same_v<void(), decltype(dummy_ptr)::FuncType>);
 } // namespace test_access_through_value_also_works_if_specified_as_pointer

 namespace test_run_through_type {
-int add(int a, int b) {
+static int add(int a, int b) {
  return a + b;
 }
 using Add = TORCH_FN_TYPE(add);
@ -58,11 +58,11 @@ TEST(CompileTimeFunctionPointerTest, runFunctionThroughType) {
 } // namespace test_run_through_type

 namespace test_run_through_value {
-int add(int a, int b) {
+static int add(int a, int b) {
  return a + b;
 }
 template <class Func>
-int execute(Func, int a, int b) {
+static int execute(Func, int a, int b) {
  return Func::func_ptr()(a, b);
 }

--- a/c10/test/util/TypeTraits_test.cpp
+++ b/c10/test/util/TypeTraits_test.cpp
@ -51,7 +51,7 @@ struct Functor {
 auto lambda = []() {};
 // func() and func__ just exists to silence a compiler warning about lambda
 // being unused
-bool func() {
+static bool func() {
  lambda();
  return true;
 }
@ -151,6 +151,7 @@ struct MyStatelessConstFunctor final {
  Result operator()(Args...) const {}
 };

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 void func() {
  auto stateless_lambda = [](int a) { return a; };
  static_assert(is_stateless_lambda<decltype(stateless_lambda)>::value, "");
--- a/c10/test/util/flags_test.cpp
+++ b/c10/test/util/flags_test.cpp
@ -4,6 +4,7 @@

 #include <c10/util/Flags.h>

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_bool(c10_flags_test_only_flag, true, "Only used in test.");

 namespace c10_test {
--- a/c10/test/util/irange_test.cpp
+++ b/c10/test/util/irange_test.cpp
@ -59,7 +59,7 @@ TEST(irange, empty_reverse_range_one_input) {
  ASSERT_EQ(test_vec, correct);
 }

-constexpr std::array<int, 3> toy_iota() {
+static constexpr std::array<int, 3> toy_iota() {
  std::array<int, 3> result = {0};
  for (const auto i : c10::irange(3)) {
    result[i] = i;
@ -67,7 +67,7 @@ constexpr std::array<int, 3> toy_iota() {
  return result;
 }

-constexpr std::array<int, 3> toy_iota_with_start(int start) {
+static constexpr std::array<int, 3> toy_iota_with_start(int start) {
  std::array<int, 3> result = {0};
  for (const auto i : c10::irange(start, start + 3)) {
    result[i - start] = i;
--- a/c10/test/util/logging_test.cpp
+++ b/c10/test/util/logging_test.cpp
@ -159,19 +159,18 @@ TEST(LoggingDeathTest, TestEnforceUsingFatal) {
 }
 #endif

-C10_NOINLINE void f1() {
+#ifdef FBCODE_CAFFE2
+static C10_NOINLINE void f1() {
  CAFFE_THROW("message");
 }

-C10_NOINLINE void f2() {
+static C10_NOINLINE void f2() {
  f1();
 }

-C10_NOINLINE void f3() {
+static C10_NOINLINE void f3() {
  f2();
 }
-
-#ifdef FBCODE_CAFFE2
 TEST(LoggingTest, ExceptionWhat) {
  std::optional<::c10::Error> error;
  try {
--- a/c10/test/util/registry_test.cpp
+++ b/c10/test/util/registry_test.cpp
@ -16,6 +16,7 @@ class Foo {
  virtual ~Foo() = default;
 };

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DECLARE_REGISTRY(FooRegistry, Foo, int);
 C10_DEFINE_REGISTRY(FooRegistry, Foo, int);
 #define REGISTER_FOO(clsname) C10_REGISTER_CLASS(FooRegistry, clsname, clsname)
@ -48,22 +49,22 @@ TEST(RegistryTest, ReturnNullOnNonExistingCreator) {
 }

 // C10_REGISTER_CLASS_WITH_PRIORITY defines static variable
-void RegisterFooDefault() {
+static void RegisterFooDefault() {
  C10_REGISTER_CLASS_WITH_PRIORITY(
      FooRegistry, FooWithPriority, c10::REGISTRY_DEFAULT, Foo);
 }

-void RegisterFooDefaultAgain() {
+static void RegisterFooDefaultAgain() {
  C10_REGISTER_CLASS_WITH_PRIORITY(
      FooRegistry, FooWithPriority, c10::REGISTRY_DEFAULT, Foo);
 }

-void RegisterFooBarFallback() {
+static void RegisterFooBarFallback() {
  C10_REGISTER_CLASS_WITH_PRIORITY(
      FooRegistry, FooWithPriority, c10::REGISTRY_FALLBACK, Bar);
 }

-void RegisterFooBarPreferred() {
+static void RegisterFooBarPreferred() {
  C10_REGISTER_CLASS_WITH_PRIORITY(
      FooRegistry, FooWithPriority, c10::REGISTRY_PREFERRED, Bar);
 }
--- a/c10/util/Logging.cpp
+++ b/c10/util/Logging.cpp
@ -273,8 +273,11 @@ DECLARE_bool(logtostderr);
 // This backward compatibility flags are in order to deal with cases where
 // Caffe2 are not built with glog, but some init flags still pass in these
 // flags. They may go away in the future.
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_int32(minloglevel, 0, "Equivalent to glog minloglevel")
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_int32(v, 0, "Equivalent to glog verbose")
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 C10_DEFINE_bool(logtostderr, false, "Equivalent to glog logtostderr")
 #endif // !defined(c10_USE_GLOG)

--- a/c10/xpu/XPUCachingAllocator.cpp
+++ b/c10/xpu/XPUCachingAllocator.cpp
@ -510,7 +510,7 @@ class DeviceCachingAllocator {
  }
 };

-void local_raw_delete(void* ptr);
+static void local_raw_delete(void* ptr);

 class XPUAllocator : public Allocator {
 private:
--- a/c10/xpu/test/impl/XPUCachingAllocatorTest.cpp
+++ b/c10/xpu/test/impl/XPUCachingAllocatorTest.cpp
@ -4,10 +4,6 @@
 #include <c10/xpu/XPUCachingAllocator.h>
 #include <c10/xpu/XPUException.h>

-bool has_xpu() {
-  return c10::xpu::device_count() > 0;
-}
-
 TEST(XPUCachingAllocatorTest, GetXPUAllocator) {
  auto* allocator = c10::xpu::XPUCachingAllocator::get();

--- a/c10/xpu/test/impl/XPUDeviceTest.cpp
+++ b/c10/xpu/test/impl/XPUDeviceTest.cpp
@ -2,7 +2,7 @@

 #include <c10/xpu/XPUFunctions.h>

-bool has_xpu() {
+static bool has_xpu() {
  return c10::xpu::device_count() > 0;
 }

--- a/c10/xpu/test/impl/XPUGuardTest.cpp
+++ b/c10/xpu/test/impl/XPUGuardTest.cpp
@ -5,7 +5,7 @@
 #include <c10/xpu/XPUStream.h>
 #include <c10/xpu/test/impl/XPUTest.h>

-bool has_xpu() {
+static bool has_xpu() {
  return c10::xpu::device_count() > 0;
 }

--- a/c10/xpu/test/impl/XPUStreamTest.cpp
+++ b/c10/xpu/test/impl/XPUStreamTest.cpp
@ -11,7 +11,7 @@
 #include <thread>
 #include <unordered_set>

-bool has_xpu() {
+static bool has_xpu() {
  return c10::xpu::device_count() > 0;
 }

@ -98,7 +98,7 @@ TEST(XPUStreamTest, StreamBehavior) {
  EXPECT_NE(stream.device_index(), c10::xpu::current_device());
 }

-void thread_fun(std::optional<c10::xpu::XPUStream>& cur_thread_stream) {
+static void thread_fun(std::optional<c10::xpu::XPUStream>& cur_thread_stream) {
  auto new_stream = c10::xpu::getStreamFromPool();
  c10::xpu::setCurrentXPUStream(new_stream);
  cur_thread_stream = {c10::xpu::getCurrentXPUStream()};
@ -153,7 +153,11 @@ TEST(XPUStreamTest, StreamPoolRoundRobinTest) {
  EXPECT_TRUE(result_pair.second);
 }

-void asyncMemCopy(sycl::queue& queue, int* dst, int* src, size_t numBytes) {
+static void asyncMemCopy(
+    sycl::queue& queue,
+    int* dst,
+    int* src,
+    size_t numBytes) {
  queue.memcpy(dst, src, numBytes);
 }

--- a/torch/csrc/DataLoader.cpp
+++ b/torch/csrc/DataLoader.cpp
@ -102,6 +102,7 @@ static void handler_SIGTERM(int sig, siginfo_t* info, void* ctx) {
  }
 }

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 __attribute__((weak)) void setDataLoaderSignalHandlers() {}

 static PyObject* THPModule_setWorkerSignalHandlers(
--- a/torch/csrc/autograd/engine.cpp
+++ b/torch/csrc/autograd/engine.cpp
@ -868,7 +868,7 @@ void set_device(int device) {

 // Given an Edge or optional<InputMetdata>, return the InputMetadata
 template <typename T>
-const InputMetadata& get_input_metadata(const T& thing);
+const static InputMetadata& get_input_metadata(const T& thing);

 template <>
 const InputMetadata& get_input_metadata<std::optional<InputMetadata>>(
@ -884,7 +884,7 @@ const InputMetadata& get_input_metadata<Edge>(const Edge& thing) {

 // Given an Edge or optional<InputMetdata>, return if there is an InputMetadata.
 template <typename T>
-bool has_input_metadata(const T& thing);
+static bool has_input_metadata(const T& thing);

 template <>
 bool has_input_metadata<std::optional<InputMetadata>>(
@ -914,7 +914,7 @@ std::vector<std::optional<InputMetadata>> collect_input_metadata(
 // outputs. This involves using the InputMetadata to check the outputs and also
 // potentially calling .sum_to on the outputs.
 template <typename T>
-void validate_outputs_impl(
+static void validate_outputs_impl(
    const std::vector<T>& input_metadata_container,
    variable_list& grads,
    const std::function<std::string(const std::string&)>& format_error) {
@ -1463,7 +1463,7 @@ Engine& Engine::get_base_engine() {
  return engine;
 }

-std::atomic<EngineStub> engine_stub(Engine::get_base_engine);
+static std::atomic<EngineStub> engine_stub(Engine::get_base_engine);

 void set_default_engine_stub(EngineStub stub) {
  engine_stub.store(stub);
--- a/torch/csrc/autograd/functions/init.cpp
+++ b/torch/csrc/autograd/functions/init.cpp
@ -5,6 +5,7 @@
 #include <torch/csrc/autograd/functions/pybind.h>
 #include <torch/csrc/autograd/functions/tensor.h>
 #include <torch/csrc/autograd/generated/python_functions.h>
+#include <torch/csrc/autograd/python_autograd.h>
 #include <torch/csrc/autograd/python_cpp_function.h>
 #include <torch/csrc/autograd/python_variable.h>
 #ifdef USE_DISTRIBUTED
@ -73,7 +74,7 @@ template <
    ValueT ParamsT::*ptr,
    typename ConvertArgT,
    PyObject* (*Convert)(ConvertArgT)>
-PyObject* getTupleAttr(PyObject* obj, void* _unused) {
+static PyObject* getTupleAttr(PyObject* obj, void* _unused) {
  HANDLE_TH_ERRORS
  THPCppFunction* self = (THPCppFunction*)obj;
  auto& arr = ((T*)(self->cdata.get()))->*ptr;
@ -95,7 +96,7 @@ template <
    ValueT ParamsT::*ptr,
    typename ConvertArgT,
    PyObject* (*Convert)(ConvertArgT)>
-PyObject* getValueAttr(PyObject* obj, void* _unused) {
+static PyObject* getValueAttr(PyObject* obj, void* _unused) {
  HANDLE_TH_ERRORS
  THPCppFunction* self = (THPCppFunction*)obj;
  auto& val = ((T*)(self->cdata.get()))->*ptr;
--- a/torch/csrc/autograd/init.cpp
+++ b/torch/csrc/autograd/init.cpp
@ -19,6 +19,7 @@
 #include <torch/csrc/autograd/input_metadata.h>
 #include <torch/csrc/autograd/profiler.h>
 #include <torch/csrc/autograd/profiler_python.h>
+#include <torch/csrc/autograd/python_autograd.h>
 #include <torch/csrc/autograd/python_function.h>
 #include <torch/csrc/autograd/python_saved_variable_hooks.h>
 #include <torch/csrc/autograd/python_variable.h>
@ -1292,7 +1293,7 @@ static PyObject* len_torch_dispatch_stack(PyObject* _unused, PyObject* args) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPModule_increment_version(
+static PyObject* THPModule_increment_version(
    PyObject* _unused,
    PyObject* tensor_list) {
  HANDLE_TH_ERRORS
--- a/torch/csrc/autograd/python_cpp_function.cpp
+++ b/torch/csrc/autograd/python_cpp_function.cpp
@ -200,7 +200,7 @@ PyObject* THPCppFunction_sequence_nr(PyObject* self, PyObject* noargs) {
  return THPUtils_packUInt64(fn.sequence_nr());
 }

-PyObject* THPCppFunction_set_sequence_nr(
+static PyObject* THPCppFunction_set_sequence_nr(
    PyObject* self,
    PyObject* sequence_nr) {
  HANDLE_TH_ERRORS
@ -278,7 +278,7 @@ struct DefaultFunctionType {
  PyTypeObject type;
 };

-PyTypeObject* get_default_type() {
+static PyTypeObject* get_default_type() {
  static DefaultFunctionType default_type;
  return &(default_type.type);
 }
@ -339,7 +339,7 @@ bool THPCppFunction_Check(PyObject* obj) {
  }
 }

-PyObject* callRegisterFn(PyObject* dict, PyObject* hook) {
+static PyObject* callRegisterFn(PyObject* dict, PyObject* hook) {
  THPObjectPtr register_fn(
      PyObject_GetAttrString(THPFunctionClass, "_register_hook"));
  if (!register_fn) {
--- a/torch/csrc/autograd/python_engine.cpp
+++ b/torch/csrc/autograd/python_engine.cpp
@ -160,8 +160,6 @@ c10::intrusive_ptr<at::ivalue::Future> PythonEngine::execute_with_graph_task(
 }
 } // namespace torch::autograd::python

-PyObject* THPEngineClass = nullptr;
-
 static Edge parseGradientEdge(PyObject* obj, int64_t index) {
  PyObject* grad_fn = PyTuple_GetItem(obj, 0);
  auto output_nr = THPUtils_unpackLong(PyTuple_GetItem(obj, 1));
@ -181,7 +179,7 @@ static Edge parseGradientEdge(PyObject* obj, int64_t index) {
 }

 // Implementation of torch._C._EngineBase.run_backward
-PyObject* THPEngine_run_backward(
+static PyObject* THPEngine_run_backward(
    PyObject* self,
    PyObject* args,
    PyObject* kwargs) {
@ -396,7 +394,7 @@ PyObject* THPEngine_run_backward(
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPEngine_queue_callback(PyObject* self, PyObject* _callback) {
+static PyObject* THPEngine_queue_callback(PyObject* self, PyObject* _callback) {
  HANDLE_TH_ERRORS
  auto& engine = python::PythonEngine::get_python_engine();
  std::shared_ptr<PyObject> callback(_callback, [](PyObject* obj) {
@ -431,7 +429,9 @@ PyObject* THPEngine_queue_callback(PyObject* self, PyObject* _callback) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPEngine_is_checkpoint_valid(PyObject* self, PyObject* noargs) {
+static PyObject* THPEngine_is_checkpoint_valid(
+    PyObject* self,
+    PyObject* noargs) {
  HANDLE_TH_ERRORS
  auto& engine = python::PythonEngine::get_python_engine();
  if (engine.is_checkpoint_valid()) {
@ -442,7 +442,10 @@ PyObject* THPEngine_is_checkpoint_valid(PyObject* self, PyObject* noargs) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPEngine_new(PyTypeObject* type, PyObject* args, PyObject* kwargs) {
+static PyObject* THPEngine_new(
+    PyTypeObject* type,
+    PyObject* args,
+    PyObject* kwargs) {
  return type->tp_alloc(type, 0);
 }

@ -459,7 +462,7 @@ static struct PyMethodDef THPEngine_methods[] = {
     nullptr},
    {nullptr}};

-PyTypeObject THPEngineType = {
+static PyTypeObject THPEngineType = {
    PyVarObject_HEAD_INIT(nullptr, 0)
    "torch._C._EngineBase", /* tp_name */
    sizeof(THPEngine), /* tp_basicsize */
--- a/torch/csrc/autograd/python_legacy_variable.cpp
+++ b/torch/csrc/autograd/python_legacy_variable.cpp
@ -108,7 +108,7 @@ static PyObject* THPVariable_pynew(
  END_HANDLE_TH_ERRORS
 }

-PyTypeObject THPLegacyVariableType = {
+static PyTypeObject THPLegacyVariableType = {
    PyVarObject_HEAD_INIT(nullptr, 0)
    "torch._C._LegacyVariableBase", /* tp_name */
    0, /* tp_basicsize */
--- a/torch/csrc/autograd/python_nested_functions_manual.cpp
+++ b/torch/csrc/autograd/python_nested_functions_manual.cpp
@ -1,3 +1,4 @@
+#include <torch/csrc/autograd/python_nested_functions.h>
 #include <torch/csrc/utils/nested.h>
 #include <torch/csrc/utils/pycfunction_helpers.h>
 #include <torch/csrc/utils/python_arg_parser.h>
--- a/torch/csrc/autograd/python_torch_functions.h
+++ b/torch/csrc/autograd/python_torch_functions.h
@ -20,6 +20,6 @@ inline PyObject* TypeError_to_NotImplemented_(
  return ret;
 }

-void initTorchFunctions();
+void initTorchFunctions(PyObject* module);

 } // namespace torch::autograd
--- a/torch/csrc/autograd/python_torch_functions_manual.cpp
+++ b/torch/csrc/autograd/python_torch_functions_manual.cpp
@ -46,7 +46,7 @@ namespace torch::autograd {
 // NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
 PyObject* THPVariableFunctionsModule = nullptr;

-inline Tensor dispatch_range(
+inline static Tensor dispatch_range(
    const Scalar& start,
    const Scalar& end,
    const Scalar& step,
@ -56,7 +56,7 @@ inline Tensor dispatch_range(
  return at::range_out(result, start, end, step);
 }

-inline Tensor dispatch_range(
+inline static Tensor dispatch_range(
    const Scalar& start,
    const Scalar& end,
    const Scalar& step,
@ -486,11 +486,14 @@ static PyObject* THPVariable_numel(
 }

 // Sharded function definitions
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 void gatherTorchFunctions_0(std::vector<PyMethodDef>& torch_functions);
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 void gatherTorchFunctions_1(std::vector<PyMethodDef>& torch_functions);
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 void gatherTorchFunctions_2(std::vector<PyMethodDef>& torch_functions);

-void gatherTorchFunctions(std::vector<PyMethodDef>& torch_functions) {
+static void gatherTorchFunctions(std::vector<PyMethodDef>& torch_functions) {
  constexpr size_t num_functions =
      sizeof(torch_functions_manual) / sizeof(torch_functions_manual[0]);
  torch_functions.assign(
--- a/torch/csrc/autograd/python_variable.cpp
+++ b/torch/csrc/autograd/python_variable.cpp
@ -17,6 +17,7 @@
 #include <torch/csrc/autograd/function.h>
 #include <torch/csrc/autograd/python_cpp_function.h>
 #include <torch/csrc/autograd/python_hook.h>
+#include <torch/csrc/autograd/python_torch_functions.h>
 #include <torch/csrc/autograd/python_variable_indexing.h>
 #include <torch/csrc/autograd/utils/error_messages.h>
 #include <torch/csrc/autograd/utils/wrap_outputs.h>
@ -317,7 +318,7 @@ PyObject* THPVariable_Wrap(const at::TensorBase& var) {
  return THPVariable_NewWithVar((PyTypeObject*)THPVariableClass, var, status);
 }

-bool isResurrectable(THPVariable* self) {
+static bool isResurrectable(THPVariable* self) {
  // We want to divide this check into 2 cases.

  // 1. C++ owns PyObject (in this case, self->cdata.unsafeIsBorrowed() is
@ -406,19 +407,19 @@ static bool THPVariable_tryResurrect(THPVariable* self) {
  return true;
 }

-int THPFake_traverse(THPVariable* self, visitproc visit, void* arg) {
+static int THPFake_traverse(THPVariable* self, visitproc visit, void* arg) {
  TORCH_INTERNAL_ASSERT(
      false, "TensorBase tp_traverse function was not overriden properly");
  return 0;
 }

-int THPFake_clear(THPVariable* self) {
+static int THPFake_clear(THPVariable* self) {
  TORCH_INTERNAL_ASSERT(
      false, "TensorBase tp_clear function was not overriden properly");
  return 0;
 }

-PyObject* THPVariable_pynew(
+static PyObject* THPVariable_pynew(
    PyTypeObject* type,
    PyObject* args,
    PyObject* kwargs);
@ -799,7 +800,9 @@ static PyObject* THPVariable_make_wrapper_subclass(
 using getter = PyObject* (*)(PyObject*, void*);
 using setter = int (*)(PyObject*, PyObject*, void*);

-PyObject* THPVariable_get_python_dispatch(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_python_dispatch(
+    THPVariable* self,
+    void* unused) {
  HANDLE_TH_ERRORS
  const auto& var = THPVariable_Unpack(self);
  return torch::autograd::utils::wrap(
@ -881,7 +884,7 @@ struct PropertyImag : GetterBase<PropertyImag> {
  }
 };

-PyObject* THPVariable_get_cdata(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_cdata(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "_cdata");
@ -891,7 +894,7 @@ PyObject* THPVariable_get_cdata(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_get_version(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_version(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "_version");
@ -901,7 +904,7 @@ PyObject* THPVariable_get_version(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_get_grad_fn(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_grad_fn(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "grad_fn");
@ -938,7 +941,10 @@ static PyObject* THPVariable_is_leaf(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-int THPVariable_set_data(THPVariable* self, PyObject* data, void* unused) {
+static int THPVariable_set_data(
+    THPVariable* self,
+    PyObject* data,
+    void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_setter(self, "data", data);
@ -955,7 +961,10 @@ int THPVariable_set_data(THPVariable* self, PyObject* data, void* unused) {
  END_HANDLE_TH_ERRORS_RET(-1)
 }

-int THPVariable_set_grad(THPVariable* self, PyObject* py_grad, void* unused) {
+static int THPVariable_set_grad(
+    THPVariable* self,
+    PyObject* py_grad,
+    void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_setter(self, "grad", py_grad);
@ -1013,7 +1022,7 @@ int THPVariable_set_grad(THPVariable* self, PyObject* py_grad, void* unused) {
  END_HANDLE_TH_ERRORS_RET(-1)
 }

-PyObject* THPVariable_get_volatile(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_volatile(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "volatile");
@ -1026,7 +1035,10 @@ PyObject* THPVariable_get_volatile(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-int THPVariable_set_volatile(THPVariable* self, PyObject* obj, void* unused) {
+static int THPVariable_set_volatile(
+    THPVariable* self,
+    PyObject* obj,
+    void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_setter(self, "volatile", obj);
@ -1038,7 +1050,7 @@ int THPVariable_set_volatile(THPVariable* self, PyObject* obj, void* unused) {
  END_HANDLE_TH_ERRORS_RET(-1)
 }

-PyObject* THPVariable_get_output_nr(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_output_nr(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "output_nr");
@ -1049,7 +1061,9 @@ PyObject* THPVariable_get_output_nr(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_get_requires_grad(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_requires_grad(
+    THPVariable* self,
+    void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "requires_grad");
@ -1062,7 +1076,7 @@ PyObject* THPVariable_get_requires_grad(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_retains_grad(THPVariable* self, void* unused) {
+static PyObject* THPVariable_retains_grad(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "retains_grad");
@ -1075,7 +1089,7 @@ PyObject* THPVariable_retains_grad(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_get_ndim(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_ndim(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "ndim");
@ -1084,7 +1098,7 @@ PyObject* THPVariable_get_ndim(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_get_names(PyObject* self, void* unused) {
+static PyObject* THPVariable_get_names(PyObject* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function(self)) {
    return handle_torch_function_getter((THPVariable*)self, "names");
@ -1122,7 +1136,10 @@ PyObject* THPVariable_get_names(PyObject* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-int THPVariable_set_names(PyObject* self, PyObject* names, void* unused) {
+static int THPVariable_set_names(
+    PyObject* self,
+    PyObject* names,
+    void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function(self)) {
    return handle_torch_function_setter((THPVariable*)self, "names", names);
@ -1140,7 +1157,7 @@ int THPVariable_set_names(PyObject* self, PyObject* names, void* unused) {
  END_HANDLE_TH_ERRORS_RET(-1)
 }

-int THPVariable_set_requires_grad(
+static int THPVariable_set_requires_grad(
    THPVariable* self,
    PyObject* obj,
    void* unused) {
@ -1167,7 +1184,7 @@ int THPVariable_set_requires_grad(
  END_HANDLE_TH_ERRORS_RET(-1)
 }

-PyObject* THPVariable_get_name(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_name(THPVariable* self, void* unused) {
  if (check_has_torch_function((PyObject*)self)) {
    HANDLE_TH_ERRORS
    return handle_torch_function_getter(self, "name");
@ -1179,7 +1196,9 @@ PyObject* THPVariable_get_name(THPVariable* self, void* unused) {
  return THPUtils_packString(tensor.name().c_str());
 }

-PyObject* THPVariable_get_backwards_hooks(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_backwards_hooks(
+    THPVariable* self,
+    void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "_backward_hooks");
@ -1192,7 +1211,7 @@ PyObject* THPVariable_get_backwards_hooks(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-int THPVariable_set_backwards_hooks(
+static int THPVariable_set_backwards_hooks(
    THPVariable* self,
    PyObject* obj,
    void* unused) {
@ -1217,7 +1236,7 @@ int THPVariable_set_backwards_hooks(
  END_HANDLE_TH_ERRORS_RET(-1)
 }

-PyObject* THPVariable_get_post_accumulate_grad_hooks(
+static PyObject* THPVariable_get_post_accumulate_grad_hooks(
    THPVariable* self,
    void* unused) {
  HANDLE_TH_ERRORS
@ -1232,7 +1251,7 @@ PyObject* THPVariable_get_post_accumulate_grad_hooks(
  END_HANDLE_TH_ERRORS
 }

-int THPVariable_set_post_accumulate_grad_hooks(
+static int THPVariable_set_post_accumulate_grad_hooks(
    THPVariable* self,
    PyObject* obj,
    void* unused) {
@ -1257,7 +1276,7 @@ int THPVariable_set_post_accumulate_grad_hooks(
  END_HANDLE_TH_ERRORS_RET(-1)
 }

-PyObject* THPVariable_get_base(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_base(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "_base");
@ -1270,7 +1289,7 @@ PyObject* THPVariable_get_base(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_get_shape(THPVariable* self, void* unused) {
+static PyObject* THPVariable_get_shape(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "shape");
@ -1279,7 +1298,7 @@ PyObject* THPVariable_get_shape(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_cpu(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_cpu(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_cpu");
@ -1289,7 +1308,7 @@ PyObject* THPVariable_is_cpu(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_cuda(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_cuda(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_cuda");
@ -1299,7 +1318,7 @@ PyObject* THPVariable_is_cuda(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_mtia(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_mtia(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_mtia");
@ -1309,7 +1328,7 @@ PyObject* THPVariable_is_mtia(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_xla(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_xla(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_xla");
@ -1319,7 +1338,7 @@ PyObject* THPVariable_is_xla(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_ipu(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_ipu(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_ipu");
@ -1329,7 +1348,7 @@ PyObject* THPVariable_is_ipu(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_xpu(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_xpu(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_xpu");
@ -1339,7 +1358,7 @@ PyObject* THPVariable_is_xpu(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_sparse(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_sparse(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_sparse");
@ -1349,7 +1368,7 @@ PyObject* THPVariable_is_sparse(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_sparse_csr(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_sparse_csr(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_sparse_csr");
@ -1359,7 +1378,7 @@ PyObject* THPVariable_is_sparse_csr(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_mkldnn(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_mkldnn(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_mkldnn");
@ -1369,7 +1388,7 @@ PyObject* THPVariable_is_mkldnn(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_mps(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_mps(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_mps");
@ -1379,7 +1398,7 @@ PyObject* THPVariable_is_mps(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_maia(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_maia(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_maia");
@ -1389,7 +1408,7 @@ PyObject* THPVariable_is_maia(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_vulkan(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_vulkan(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_vulkan");
@ -1399,7 +1418,7 @@ PyObject* THPVariable_is_vulkan(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_quantized(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_quantized(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_quantized");
@ -1409,7 +1428,7 @@ PyObject* THPVariable_is_quantized(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_meta(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_meta(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_meta");
@ -1419,7 +1438,7 @@ PyObject* THPVariable_is_meta(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_complex(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_complex(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_complex");
@ -1429,7 +1448,7 @@ PyObject* THPVariable_is_complex(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_is_nested(THPVariable* self, void* unused) {
+static PyObject* THPVariable_is_nested(THPVariable* self, void* unused) {
  HANDLE_TH_ERRORS
  if (check_has_torch_function((PyObject*)self)) {
    return handle_torch_function_getter(self, "is_nested");
@ -1439,7 +1458,7 @@ PyObject* THPVariable_is_nested(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THPVariable_has_symbolic_sizes_strides(
+static PyObject* THPVariable_has_symbolic_sizes_strides(
    THPVariable* self,
    void* unused) {
  HANDLE_TH_ERRORS
@ -1496,7 +1515,7 @@ static PyObject* THPVariable_get_itemsize(THPVariable* self, void* unused) {
  END_HANDLE_TH_ERRORS
 }

-int THPVariable_set_real(PyObject* self, PyObject* real, void* unused) {
+static int THPVariable_set_real(PyObject* self, PyObject* real, void* unused) {
  HANDLE_TH_ERRORS
  auto& self_ = THPVariable_Unpack(self);
  auto self_real = at::real(self_);
@ -1509,7 +1528,7 @@ int THPVariable_set_real(PyObject* self, PyObject* real, void* unused) {
  END_HANDLE_TH_ERRORS_RET(-1)
 }

-int THPVariable_set_imag(PyObject* self, PyObject* imag, void* unused) {
+static int THPVariable_set_imag(PyObject* self, PyObject* imag, void* unused) {
  HANDLE_TH_ERRORS
  auto& self_ = THPVariable_Unpack(self);
  auto self_imag = at::imag(self_);
@ -1522,7 +1541,7 @@ int THPVariable_set_imag(PyObject* self, PyObject* imag, void* unused) {
  END_HANDLE_TH_ERRORS_RET(-1)
 }

-PyObject* THPVariable__use_count(PyObject* self, PyObject* noargs) {
+static PyObject* THPVariable__use_count(PyObject* self, PyObject* noargs) {
  HANDLE_TH_ERRORS
  const auto& t = THPVariable_Unpack(self);
  return THPUtils_packUInt64(t.use_count());
@ -1687,9 +1706,12 @@ struct THPVariableMeta {
  PyHeapTypeObject base;
 };

-int THPVariableMetaType_init(PyObject* cls, PyObject* args, PyObject* kwargs);
+static int THPVariableMetaType_init(
+    PyObject* cls,
+    PyObject* args,
+    PyObject* kwargs);

-PyTypeObject THPVariableMetaType = {
+static PyTypeObject THPVariableMetaType = {
    PyVarObject_HEAD_INIT(DEFERRED_ADDRESS(&PyType_Type), 0)
    "torch._C._TensorMeta", /* tp_name */
    sizeof(THPVariableMeta), /* tp_basicsize */
@ -1731,7 +1753,7 @@ PyTypeObject THPVariableMetaType = {
    nullptr, /* tp_new */
 };

-PyTypeObject THPVariableType = {
+static PyTypeObject THPVariableType = {
    PyVarObject_HEAD_INIT(&THPVariableMetaType, 0)
    "torch._C.TensorBase", /* tp_name */
    sizeof(THPVariable), /* tp_basicsize */
@ -1928,7 +1950,7 @@ static int THPVariable_subclass_clear(THPVariable* self) {
 // NB: this is not the tp_dealloc on THPVariable; instead, its the dealloc
 // on subclasses.  It's never valid to construct a THPVariable so it's not
 // necessary to implement the dealloc for that case
-void THPVariable_subclass_dealloc(PyObject* self) {
+static void THPVariable_subclass_dealloc(PyObject* self) {
  if (THPVariable_tryResurrect((THPVariable*)self))
    return;

@ -2375,9 +2397,8 @@ namespace torch::autograd {

 // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays,cppcoreguidelines-avoid-non-const-global-variables)
 extern PyMethodDef variable_methods[];
-extern void initTorchFunctions(PyObject* module);

-void initTensorImplConversion(PyObject* module) {
+static void initTensorImplConversion(PyObject* module) {
  auto m = py::handle(module).cast<py::module>();
  m.def("_wrap_tensor_impl", [](void* ptr) {
    auto p = c10::intrusive_ptr<c10::TensorImpl, at::UndefinedTensorImpl>::
--- a/torch/csrc/autograd/record_function_ops.cpp
+++ b/torch/csrc/autograd/record_function_ops.cpp
@ -75,7 +75,7 @@ static void record_function_exit_new(
 }

 template <typename Func>
-c10::intrusive_ptr<c10::ivalue::Future> _call_end_callbacks_on_fut(
+static c10::intrusive_ptr<c10::ivalue::Future> _call_end_callbacks_on_fut(
    Func get_record,
    const c10::intrusive_ptr<c10::ivalue::Future>& fut) {
  // Profiling callback that ends the associated record_function
--- a/torch/csrc/dynamo/eval_frame_cpp.cpp
+++ b/torch/csrc/dynamo/eval_frame_cpp.cpp
@ -7,6 +7,7 @@
 #include <torch/csrc/dynamo/framelocals_mapping.h>
 #include <torch/csrc/utils/python_compat.h>

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 const char* cache_lookup_profiler_str = "TorchDynamo Cache Lookup";

 // Remember to update the type signature for DynamoCallbackFn.__call__ in
--- a/torch/csrc/export/pybind.cpp
+++ b/torch/csrc/export/pybind.cpp
@ -1,3 +1,4 @@
+#include <torch/csrc/export/pybind.h>
 #include <torch/csrc/utils/generated_serialization_types.h>
 #include <torch/csrc/utils/pybind.h>

--- a/torch/csrc/functorch/init.cpp
+++ b/torch/csrc/functorch/init.cpp
@ -6,6 +6,7 @@

 #include <ATen/FunctionalTensorWrapper.h>
 #include <ATen/WrapDimUtils.h>
+#include <torch/csrc/functorch/init.h>
 #include <torch/csrc/utils/python_raii.h>
 #include <torch/python.h>

@ -35,17 +36,20 @@ static bool has_level(const Tensor& self, int64_t level) {
  return batched->level() >= level;
 }

-Tensor _add_batch_dim(const Tensor& self, int64_t batch_dim, int64_t level) {
+static Tensor _add_batch_dim(
+    const Tensor& self,
+    int64_t batch_dim,
+    int64_t level) {
  return addBatchDim(self, batch_dim, level);
 }

-Tensor _wrap_functional_tensor(const Tensor& self, int64_t level) {
+static Tensor _wrap_functional_tensor(const Tensor& self, int64_t level) {
  auto t = at::functionalization::impl::to_functional_tensor(self);
  at::functionalization::impl::unsafeGetFunctionalWrapper(t)->set_level(level);
  return t;
 }

-void _assert_wrapped_functional(
+static void _assert_wrapped_functional(
    const Tensor& unwrapped,
    const Tensor& wrapped) {
  TORCH_INTERNAL_ASSERT(
@ -59,7 +63,7 @@ void _assert_wrapped_functional(
      unwrapped.unsafeGetTensorImpl() == wrapped_inner.unsafeGetTensorImpl())
 }

-void _propagate_functional_input_mutation(
+static void _propagate_functional_input_mutation(
    const Tensor& unwrapped,
    const Tensor& wrapped) {
  TORCH_INTERNAL_ASSERT(
@ -139,7 +143,7 @@ static Tensor _movedim(const Tensor& self, int64_t src, int64_t dst) {
 //
 // `out_dim` controls where we should put the batch dimension in the output
 // tensor.
-Tensor _remove_batch_dim(
+static Tensor _remove_batch_dim(
    const Tensor& self,
    int64_t level,
    const c10::SymInt& batch_size,
@ -166,7 +170,9 @@ Tensor _remove_batch_dim(
  return result;
 }

-Tensor _unwrap_functional_tensor(const Tensor& self, bool add_back_views) {
+static Tensor _unwrap_functional_tensor(
+    const Tensor& self,
+    bool add_back_views) {
  // We only ever call that after popping out of a functionalize() call, in
  // which case the current tensors should always be wrapped in a
  // FunctionalTensorWrapper.
@ -187,7 +193,7 @@ Tensor _unwrap_functional_tensor(const Tensor& self, bool add_back_views) {
  return functional->value();
 }

-Tensor _wrap_for_grad(const Tensor& self, int64_t level) {
+static Tensor _wrap_for_grad(const Tensor& self, int64_t level) {
  // NB: different behavior inside??
  // return self;
  // TORCH_INTERNAL_ASSERT(!maybeGetTensorWrapper(self));
@ -195,7 +201,7 @@ Tensor _wrap_for_grad(const Tensor& self, int64_t level) {
  return makeTensorWrapper(self, level);
 }

-Tensor _unwrap_for_grad(const Tensor& self, int64_t level) {
+static Tensor _unwrap_for_grad(const Tensor& self, int64_t level) {
  auto* result = maybeGetTensorWrapper(self);
  if (!result) {
    return self;
@ -207,7 +213,7 @@ Tensor _unwrap_for_grad(const Tensor& self, int64_t level) {
  return self;
 }

-int64_t dlevel(const Tensor& tensor) {
+static int64_t dlevel(const Tensor& tensor) {
  auto* wrapped = maybeGetTensorWrapper(tensor);
  if (!wrapped) {
    return 0;
@ -219,12 +225,12 @@ int64_t dlevel(const Tensor& tensor) {
  return wrapped->level().value();
 }

-bool dump_tensor(const Tensor& self) {
+static bool dump_tensor(const Tensor& self) {
  dumpTensorCout(self);
  return true;
 }

-RandomnessType get_randomness_enum(const std::string& randomness) {
+static RandomnessType get_randomness_enum(const std::string& randomness) {
  if (randomness == "error") {
    return RandomnessType::Error;
  } else if (randomness == "same") {
@ -237,20 +243,20 @@ RandomnessType get_randomness_enum(const std::string& randomness) {
  }
 }

-int64_t _grad_increment_nesting() {
+static int64_t _grad_increment_nesting() {
  // See NOTE [grad and vjp interaction with no_grad]
  bool prev_grad_mode = c10::GradMode::is_enabled();
  return initAndPushDynamicLayer(
      TransformType::Grad, std::nullopt, std::nullopt, prev_grad_mode);
 }

-int64_t _grad_decrement_nesting() {
+static int64_t _grad_decrement_nesting() {
  auto layer = popDynamicLayerAndDeleteMetadata();
  TORCH_INTERNAL_ASSERT(layer.key() == TransformType::Grad);
  return layer.layerId();
 }

-int64_t _jvp_increment_nesting() {
+static int64_t _jvp_increment_nesting() {
  // See NOTE [grad and vjp interaction with no_grad]
  bool prev_fwd_grad_mode =
      c10::AutogradState::get_tls_state().get_fw_grad_mode();
@ -262,13 +268,13 @@ int64_t _jvp_increment_nesting() {
      prev_fwd_grad_mode);
 }

-int64_t _jvp_decrement_nesting() {
+static int64_t _jvp_decrement_nesting() {
  auto layer = popDynamicLayerAndDeleteMetadata();
  TORCH_INTERNAL_ASSERT(layer.key() == TransformType::Jvp);
  return layer.layerId();
 }

-int64_t _vmap_increment_nesting(
+static int64_t _vmap_increment_nesting(
    c10::SymInt batch_size,
    const std::string& randomness) {
  return initAndPushDynamicLayer(
@ -277,13 +283,13 @@ int64_t _vmap_increment_nesting(
      get_randomness_enum(randomness));
 }

-int64_t _vmap_decrement_nesting() {
+static int64_t _vmap_decrement_nesting() {
  auto layer = popDynamicLayerAndDeleteMetadata();
  TORCH_INTERNAL_ASSERT(layer.key() == TransformType::Vmap);
  return layer.layerId();
 }

-int64_t _func_increment_nesting(bool reapply_views) {
+static int64_t _func_increment_nesting(bool reapply_views) {
  return initAndPushDynamicLayer(
      TransformType::Functionalize,
      std::nullopt,
@ -293,7 +299,7 @@ int64_t _func_increment_nesting(bool reapply_views) {
      /*functionalize_add_back_views=*/reapply_views);
 }

-int64_t _func_decrement_nesting() {
+static int64_t _func_decrement_nesting() {
  auto layer = popDynamicLayerAndDeleteMetadata();
  TORCH_INTERNAL_ASSERT(layer.key() == TransformType::Functionalize);
  return layer.layerId();
--- a/torch/csrc/fx/node.cpp
+++ b/torch/csrc/fx/node.cpp
@ -612,7 +612,7 @@ static int NodeIter_init_fn(NodeIter* self, PyObject* args, PyObject* kwargs) {
 }

 template <bool reversed>
-PyObject* NodeIter_iternext_helper(NodeIter* self) {
+static PyObject* NodeIter_iternext_helper(NodeIter* self) {
  // It should be possible to relax the ref counting here
  // but in practice, we do not have that many _erased Nodes,
  // so probably not worth it.
@ -644,7 +644,7 @@ PyObject* NodeIter_iternext_helper(NodeIter* self) {
  return nullptr;
 }

-PyObject* NodeIter_iternext(PyObject* _self) {
+static PyObject* NodeIter_iternext(PyObject* _self) {
  NodeIter* self = (NodeIter*)_self;
  if (self->_reversed) {
    return NodeIter_iternext_helper<true>(self);
--- a/torch/csrc/instruction_counter/Module.cpp
+++ b/torch/csrc/instruction_counter/Module.cpp
@ -17,6 +17,7 @@

 namespace torch::instruction_counter {

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 long start() {
 #if !defined(__linux__)
  throw std::runtime_error("This systems seems not to be Linux");
@ -48,6 +49,7 @@ long start() {
 #endif
 }

+// NOLINTNEXTLINE(misc-use-internal-linkage)
 uint64_t end(int fd) {
 #if !defined(__linux__)
  throw std::runtime_error("This systems seems not to be Linux");
--- a/torch/csrc/itt.cpp
+++ b/torch/csrc/itt.cpp
@ -2,6 +2,7 @@
 #include <torch/csrc/utils/pybind.h>

 namespace torch::profiler {
+// NOLINTNEXTLINE(misc-use-internal-linkage)
 void initIttBindings(PyObject* module) {
  auto m = py::handle(module).cast<py::module>();

--- a/torch/csrc/itt_wrapper.cpp
+++ b/torch/csrc/itt_wrapper.cpp
@ -3,7 +3,7 @@
 #include <torch/csrc/profiler/stubs/base.h>

 namespace torch::profiler {
-__itt_domain* _itt_domain = __itt_domain_create("PyTorch");
+static __itt_domain* _itt_domain = __itt_domain_create("PyTorch");

 bool itt_is_available() {
  return torch::profiler::impl::ittStubs()->enabled();
--- a/torch/csrc/lazy/python/init.cpp
+++ b/torch/csrc/lazy/python/init.cpp
@ -27,7 +27,8 @@ namespace torch::lazy {
 // be simplified but it should probably be done together with
 // designing/refactoring the overall approach to get/set of default eager/lazy
 // device types
-torch::lazy::BackendDevice GetDeviceOrCurrent(const std::string& device_str) {
+static torch::lazy::BackendDevice GetDeviceOrCurrent(
+    const std::string& device_str) {
  if (device_str.empty()) {
    getBackend()->GetDefaultDeviceType();
    return torch::lazy::BackendDevice();
@ -35,12 +36,12 @@ torch::lazy::BackendDevice GetDeviceOrCurrent(const std::string& device_str) {
  return torch::lazy::atenDeviceToBackendDevice(c10::Device(device_str));
 }

-std::ptrdiff_t GetTensorId(const at::Tensor& tensor) {
+static std::ptrdiff_t GetTensorId(const at::Tensor& tensor) {
  torch::lazy::LazyTensorPtr lazy_tensor = torch::lazy::TryGetLtcTensor(tensor);
  return lazy_tensor->GetUniqueId();
 }

-std::string GetTensorsDump(
+static std::string GetTensorsDump(
    const std::vector<at::Tensor>& tensors,
    const std::function<std::string(c10::ArrayRef<const torch::lazy::Node*>)>&
        coverter) {
@ -56,7 +57,7 @@ std::string GetTensorsDump(
  return coverter(nodes);
 }

-std::vector<torch::lazy::LazyTensorPtr> GetLtcTensors(
+static std::vector<torch::lazy::LazyTensorPtr> GetLtcTensors(
    const std::vector<at::Tensor>& tensors,
    bool want_all) {
  std::vector<torch::lazy::LazyTensorPtr> lazy_tensors;
@ -76,14 +77,15 @@ std::vector<torch::lazy::LazyTensorPtr> GetLtcTensors(
  return lazy_tensors;
 }

-std::string GetTensorsBackendGraph(const std::vector<at::Tensor>& tensors) {
+static std::string GetTensorsBackendGraph(
+    const std::vector<at::Tensor>& tensors) {
  std::vector<torch::lazy::LazyTensorPtr> lazy_tensors =
      GetLtcTensors(tensors, /*want_all=*/false);
  return torch::lazy::LazyGraphExecutor::Get()->DumpBackendComputation(
      lazy_tensors);
 }

-void SyncTensors(
+static void SyncTensors(
    const std::vector<at::Tensor>& tensors,
    const std::vector<std::string>& devices,
    bool wait,
--- a/torch/csrc/monitor/python_init.cpp
+++ b/torch/csrc/monitor/python_init.cpp
@ -14,6 +14,7 @@

 #include <torch/csrc/monitor/counters.h>
 #include <torch/csrc/monitor/events.h>
+#include <torch/csrc/monitor/python_init.h>

 namespace pybind11::detail {
 template <>
--- a/torch/csrc/mps/Module.cpp
+++ b/torch/csrc/mps/Module.cpp
@ -4,6 +4,7 @@
 #include <pybind11/pytypes.h>
 #include <torch/csrc/Generator.h>
 #include <torch/csrc/THP.h>
+#include <torch/csrc/mps/Module.h>
 #include <torch/csrc/python_headers.h>
 #include <torch/csrc/utils/pybind.h>
 #include <torch/csrc/utils/python_numbers.h>
--- a/torch/csrc/mtia/Module.cpp
+++ b/torch/csrc/mtia/Module.cpp
@ -3,6 +3,7 @@
 #include <c10/core/Stream.h>
 #include <torch/csrc/Generator.h>
 #include <torch/csrc/Stream.h>
+#include <torch/csrc/mtia/Module.h>
 #include <torch/csrc/python_headers.h>
 #include <torch/csrc/utils/device_lazy_init.h>
 #include <torch/csrc/utils/pybind.h>
--- a/torch/csrc/multiprocessing/init.cpp
+++ b/torch/csrc/multiprocessing/init.cpp
@ -1,5 +1,6 @@
 #include <c10/util/thread_name.h>
 #include <torch/csrc/Exceptions.h>
+#include <torch/csrc/multiprocessing/init.h>
 #include <torch/csrc/python_headers.h>
 #include <torch/csrc/utils/object_ptr.h>
 #include <torch/csrc/utils/pybind.h>
--- a/torch/csrc/profiler/perf.cpp
+++ b/torch/csrc/profiler/perf.cpp
@ -17,7 +17,7 @@ namespace torch::profiler::impl::linux_perf {
 /*
 * Syscall wrapper for perf_event_open(2)
 */
-inline long perf_event_open(
+inline static long perf_event_open(
    struct perf_event_attr* hw_event,
    pid_t pid,
    int cpu,
--- a/torch/csrc/profiler/python/init.cpp
+++ b/torch/csrc/profiler/python/init.cpp
@ -37,7 +37,7 @@ static void THPCapturedTraceback_dealloc(PyObject* self_) {
  PyObject_GC_Del(self);
 }

-PyTypeObject THPCapturedTracebackType = {
+static PyTypeObject THPCapturedTracebackType = {
    PyVarObject_HEAD_INIT(nullptr, 0)
    "torch._C._profiler.CapturedTraceback", /* tp_name */
    sizeof(THPCapturedTraceback), /* tp_basicsize */
--- a/torch/csrc/profiler/standalone/itt_observer.cpp
+++ b/torch/csrc/profiler/standalone/itt_observer.cpp
@ -32,7 +32,8 @@ struct ITTThreadLocalState : ProfilerStateBase {
 };

 template <bool report_input_shapes>
-std::unique_ptr<at::ObserverContext> enterITT(const at::RecordFunction& fn) {
+static std::unique_ptr<at::ObserverContext> enterITT(
+    const at::RecordFunction& fn) {
  if (ITTThreadLocalState::getTLS() != nullptr) {
    torch::profiler::impl::ittStubs()->rangePush(fn.name());
  }
--- a/torch/csrc/profiler/standalone/nvtx_observer.cpp
+++ b/torch/csrc/profiler/standalone/nvtx_observer.cpp
@ -124,7 +124,8 @@ static void updateOutputTensorTracker(const at::RecordFunction& fn) {
 }

 template <bool report_input_shapes>
-std::unique_ptr<at::ObserverContext> enterNVTX(const at::RecordFunction& fn) {
+static std::unique_ptr<at::ObserverContext> enterNVTX(
+    const at::RecordFunction& fn) {
  if (NVTXThreadLocalState::getTLS() != nullptr) {
    auto input_op_ids = getInputTensorOpIds(fn);
    torch::profiler::impl::cudaStubs()->rangePush(
--- a/torch/csrc/serialization.cpp
+++ b/torch/csrc/serialization.cpp
@ -8,10 +8,10 @@
 #include <torch/csrc/serialization.h>

 template <class io>
-Py_ssize_t doPartialRead(io fildes, void* buf, size_t nbytes);
+static Py_ssize_t doPartialRead(io fildes, void* buf, size_t nbytes);

 template <class io>
-Py_ssize_t doPartialWrite(io fildes, void* buf, size_t nbytes);
+static Py_ssize_t doPartialWrite(io fildes, void* buf, size_t nbytes);

 static Py_ssize_t doPartialPythonReadBuffered(
    PyObject* fildes,
--- a/torch/csrc/utils/disable_torch_function.cpp
+++ b/torch/csrc/utils/disable_torch_function.cpp
@ -7,8 +7,8 @@
 #include <ATen/PythonTorchFunctionTLS.h>

 namespace torch {
-PyObject* disabled_torch_function = nullptr;
-PyObject* disabled_torch_dispatch = nullptr;
+static PyObject* disabled_torch_function = nullptr;
+static PyObject* disabled_torch_dispatch = nullptr;

 bool torch_function_enabled() {
  return at::impl::PythonTorchFunctionTLS::get_disabled_state() ==
@ -38,7 +38,7 @@ typedef struct {
  at::impl::TorchFunctionDisabledState old_state;
 } DisableTorchFunctionSubclass;

-PyObject* DisableTorchFunctionSubclass__enter(
+static PyObject* DisableTorchFunctionSubclass__enter(
    PyObject* self,
    PyObject* unused) {
  const auto old_state = at::impl::PythonTorchFunctionTLS::get_disabled_state();
@ -50,7 +50,9 @@ PyObject* DisableTorchFunctionSubclass__enter(
  Py_RETURN_NONE;
 }

-PyObject* DisableTorchFunctionSubclass__exit(PyObject* self, PyObject* unused) {
+static PyObject* DisableTorchFunctionSubclass__exit(
+    PyObject* self,
+    PyObject* unused) {
  at::impl::PythonTorchFunctionTLS::set_disabled_state(
      ((DisableTorchFunctionSubclass*)self)->old_state);
  Py_RETURN_NONE;
@ -79,7 +81,7 @@ static PyMethodDef DisableTorchFunctionSubclass_methods[] = { // NOLINT
    {"__exit__", DisableTorchFunctionSubclass__exit, METH_VARARGS, nullptr},
    {nullptr, nullptr, 0, nullptr}};

-PyTypeObject DisableTorchFunctionSubclassType = {
+static PyTypeObject DisableTorchFunctionSubclassType = {
    PyVarObject_HEAD_INIT(nullptr, 0)
    "torch._C.DisableTorchFunctionSubclass", /* tp_name */
    sizeof(DisableTorchFunctionSubclass), /* tp_basicsize */
@ -134,7 +136,7 @@ typedef struct {
  at::impl::TorchFunctionDisabledState old_state;
 } DisableTorchFunction;

-PyObject* DisableTorchFunction__enter(PyObject* self, PyObject* unused) {
+static PyObject* DisableTorchFunction__enter(PyObject* self, PyObject* unused) {
  ((DisableTorchFunctionSubclass*)self)->old_state =
      at::impl::PythonTorchFunctionTLS::get_disabled_state();
  at::impl::PythonTorchFunctionTLS::set_disabled_state(
@ -142,7 +144,7 @@ PyObject* DisableTorchFunction__enter(PyObject* self, PyObject* unused) {
  Py_RETURN_NONE;
 }

-PyObject* DisableTorchFunction__exit(PyObject* self, PyObject* unused) {
+static PyObject* DisableTorchFunction__exit(PyObject* self, PyObject* unused) {
  at::impl::PythonTorchFunctionTLS::set_disabled_state(
      ((DisableTorchFunctionSubclass*)self)->old_state);
  Py_RETURN_NONE;
@ -153,7 +155,7 @@ static PyMethodDef DisableTorchFunction_methods[] = { // NOLINT
    {"__exit__", DisableTorchFunction__exit, METH_VARARGS, nullptr},
    {nullptr, nullptr, 0, nullptr}};

-PyTypeObject DisableTorchFunctionType = {
+static PyTypeObject DisableTorchFunctionType = {
    PyVarObject_HEAD_INIT(nullptr, 0)
    "torch._C.DisableTorchFunction", /* tp_name */
    sizeof(DisableTorchFunction), /* tp_basicsize */
@ -304,7 +306,7 @@ static bool is_basic_python_type(PyTypeObject* tp) {
      false);
 }

-inline bool has_torch_function_attr(PyObject* obj) {
+inline static bool has_torch_function_attr(PyObject* obj) {
  auto attr = PyObject_FastGetAttrString(obj, "__torch_function__");
  return (
      attr.ptr() != nullptr && attr.ptr() != torch::disabled_torch_function);
@ -321,7 +323,7 @@ auto check_has_torch_function(PyObject* obj, bool ignore_mode) -> bool {
 }
 } // namespace torch

-inline bool sequence_has_torch_function(PyObject* args) {
+inline static bool sequence_has_torch_function(PyObject* args) {
  Py_ssize_t nargs = PySequence_Fast_GET_SIZE(args);
  for (Py_ssize_t i = 0; i < nargs; i++) {
    PyObject* obj = PySequence_Fast_GET_ITEM(args, i);
@ -332,7 +334,9 @@ inline bool sequence_has_torch_function(PyObject* args) {
  return false;
 }

-inline bool array_has_torch_function(PyObject* const* args, Py_ssize_t nargs) {
+inline static bool array_has_torch_function(
+    PyObject* const* args,
+    Py_ssize_t nargs) {
  for (Py_ssize_t i = 0; i < nargs; i++) {
    if (torch::check_has_torch_function(args[i])) {
      return true;
--- a/torch/csrc/utils/python_dispatch.cpp
+++ b/torch/csrc/utils/python_dispatch.cpp
@ -63,7 +63,7 @@ static c10::AliasAnalysisKind parseAliasAnalysisKind(const std::string& k) {
 }

 template <typename Func>
-inline torch::CppFunction dispatch_str(const char* key, Func&& raw_f) {
+inline static torch::CppFunction dispatch_str(const char* key, Func&& raw_f) {
  if (key[0] != '\0') {
    return torch::dispatch(
        c10::parseDispatchKey(key), std::forward<Func>(raw_f));
--- a/torch/csrc/utils/tensor_new.cpp
+++ b/torch/csrc/utils/tensor_new.cpp
@ -705,7 +705,7 @@ c10::TensorOptions typeIdWithDefault(

 } // namespace

-Tensor legacy_tensor_generic_ctor_new(
+static Tensor legacy_tensor_generic_ctor_new(
    c10::DispatchKey dispatch_key,
    at::ScalarType scalar_type,
    PyObject* args,
@ -1360,7 +1360,7 @@ void _validate_sparse_compressed_tensor_args(
 }

 template <c10::Layout required_layout>
-void _validate_sparse_compressed_tensor_args_template(
+static void _validate_sparse_compressed_tensor_args_template(
    c10::DispatchKey dispatch_key,
    at::ScalarType scalar_type,
    PyObject* args,
--- a/torch/csrc/utils/tensor_numpy.cpp
+++ b/torch/csrc/utils/tensor_numpy.cpp
@ -507,7 +507,7 @@ at::Tensor tensor_from_cuda_array_interface(PyObject* obj) {

 // Mutated only once (during module init); behaves as an immutable variable
 // thereafter.
-bool numpy_with_dlpack_deleter_bug_installed = false;
+static bool numpy_with_dlpack_deleter_bug_installed = false;

 // NumPy implemented support for Dlpack capsules in version 1.22.0. However, the
 // initial implementation did not correctly handle the invocation of
--- a/torch/csrc/xpu/Event.cpp
+++ b/torch/csrc/xpu/Event.cpp
@ -126,7 +126,7 @@ static PyMethodDef THXPEvent_methods[] = {
    {(char*)"synchronize", THXPEvent_synchronize, METH_NOARGS, nullptr},
    {nullptr}};

-PyTypeObject THXPEventType = {
+static PyTypeObject THXPEventType = {
    PyVarObject_HEAD_INIT(nullptr, 0)
    "torch._C._XpuEventBase", /* tp_name */
    sizeof(THXPEvent), /* tp_basicsize */
--- a/torch/csrc/xpu/Module.cpp
+++ b/torch/csrc/xpu/Module.cpp
@ -9,6 +9,7 @@
 #include <torch/csrc/utils/pycfunction_helpers.h>
 #include <torch/csrc/utils/python_numbers.h>
 #include <torch/csrc/utils/python_strings.h>
+#include <torch/csrc/xpu/Module.h>

 #ifndef WIN32
 #include <pthread.h>
@ -38,7 +39,7 @@ static void poison_fork() {

 // XPU management methods

-PyObject* THXPModule_getArchFlags(PyObject* self, PyObject* noargs) {
+static PyObject* THXPModule_getArchFlags(PyObject* self, PyObject* noargs) {
  HANDLE_TH_ERRORS
 #ifdef XPU_ARCH_FLAGS
  static const char* flags = C10_STRINGIZE(XPU_ARCH_FLAGS);
@ -55,7 +56,7 @@ static PyObject* THXPModule_isInBadFork_wrap(PyObject* self, PyObject* noargs) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_setDevice_wrap(PyObject* self, PyObject* arg) {
+static PyObject* THXPModule_setDevice_wrap(PyObject* self, PyObject* arg) {
  HANDLE_TH_ERRORS
  TORCH_CHECK(THPUtils_checkLong(arg), "invalid argument to set_device");

@ -66,7 +67,7 @@ PyObject* THXPModule_setDevice_wrap(PyObject* self, PyObject* arg) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_exchangeDevice_wrap(PyObject* self, PyObject* arg) {
+static PyObject* THXPModule_exchangeDevice_wrap(PyObject* self, PyObject* arg) {
  HANDLE_TH_ERRORS
  TORCH_CHECK(THPUtils_checkLong(arg), "invalid argument to exchange_device");

@ -82,7 +83,9 @@ PyObject* THXPModule_exchangeDevice_wrap(PyObject* self, PyObject* arg) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_maybeExchangeDevice_wrap(PyObject* self, PyObject* arg) {
+static PyObject* THXPModule_maybeExchangeDevice_wrap(
+    PyObject* self,
+    PyObject* arg) {
  HANDLE_TH_ERRORS
  TORCH_CHECK(
      THPUtils_checkLong(arg), "invalid argument to maybe_exchange_device");
@ -99,7 +102,7 @@ PyObject* THXPModule_maybeExchangeDevice_wrap(PyObject* self, PyObject* arg) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_getDevice_wrap(PyObject* self, PyObject* noargs) {
+static PyObject* THXPModule_getDevice_wrap(PyObject* self, PyObject* noargs) {
  HANDLE_TH_ERRORS

  auto device_index = c10::xpu::current_device();
@ -108,14 +111,16 @@ PyObject* THXPModule_getDevice_wrap(PyObject* self, PyObject* noargs) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_getDeviceCount_wrap(PyObject* self, PyObject* noargs) {
+static PyObject* THXPModule_getDeviceCount_wrap(
+    PyObject* self,
+    PyObject* noargs) {
  HANDLE_TH_ERRORS
  poison_fork();
  return THPUtils_packUInt64(at::xpu::device_count());
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_getCurrentStream_wrap(
+static PyObject* THXPModule_getCurrentStream_wrap(
    PyObject* self,
    PyObject* device_index) {
  HANDLE_TH_ERRORS
@ -136,7 +141,7 @@ PyObject* THXPModule_getCurrentStream_wrap(
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_getCurrentStream_raw(
+static PyObject* THXPModule_getCurrentStream_raw(
    PyObject* self,
    PyObject* device_index) {
  HANDLE_TH_ERRORS
@ -149,7 +154,7 @@ PyObject* THXPModule_getCurrentStream_raw(
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_setStream_wrap(
+static PyObject* THXPModule_setStream_wrap(
    PyObject* self,
    PyObject* args,
    PyObject* kwargs) {
@ -186,7 +191,7 @@ PyObject* THXPModule_setStream_wrap(
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_xpuSynchronize(PyObject* self, PyObject* arg) {
+static PyObject* THXPModule_xpuSynchronize(PyObject* self, PyObject* arg) {
  HANDLE_TH_ERRORS
  TORCH_CHECK(THPUtils_checkLong(arg), "invalid argument to synchronize");
  auto device_index = THPUtils_unpackDeviceIndex(arg);
@ -200,14 +205,14 @@ PyObject* THXPModule_xpuSynchronize(PyObject* self, PyObject* arg) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_emptyCache(PyObject* self, PyObject* noargs) {
+static PyObject* THXPModule_emptyCache(PyObject* self, PyObject* noargs) {
  HANDLE_TH_ERRORS
  c10::xpu::XPUCachingAllocator::emptyCache();
  END_HANDLE_TH_ERRORS
  Py_RETURN_NONE;
 }

-PyObject* THXPModule_memoryStats(PyObject* self, PyObject* arg) {
+static PyObject* THXPModule_memoryStats(PyObject* self, PyObject* arg) {
  HANDLE_TH_ERRORS
  TORCH_CHECK(THPUtils_checkLong(arg), "invalid argument to memory_stats");
  const auto device_index = THPUtils_unpackDeviceIndex(arg);
@ -250,7 +255,9 @@ PyObject* THXPModule_memoryStats(PyObject* self, PyObject* arg) {
  END_HANDLE_TH_ERRORS
 }

-PyObject* THXPModule_resetPeakMemoryStats(PyObject* self, PyObject* arg) {
+static PyObject* THXPModule_resetPeakMemoryStats(
+    PyObject* self,
+    PyObject* arg) {
  HANDLE_TH_ERRORS
  TORCH_CHECK(
      THPUtils_checkLong(arg), "invalid argument to reset_peak_memory_stats");
@ -260,7 +267,7 @@ PyObject* THXPModule_resetPeakMemoryStats(PyObject* self, PyObject* arg) {
  Py_RETURN_NONE;
 }

-PyObject* THXPModule_resetAccumulatedMemoryStats(
+static PyObject* THXPModule_resetAccumulatedMemoryStats(
    PyObject* self,
    PyObject* arg) {
  HANDLE_TH_ERRORS
--- a/torch/csrc/xpu/Stream.cpp
+++ b/torch/csrc/xpu/Stream.cpp
@ -138,7 +138,7 @@ static PyMethodDef THXPStream_methods[] = {
    {"__eq__", THXPStream_eq, METH_O, nullptr},
    {nullptr}};

-PyTypeObject THXPStreamType = {
+static PyTypeObject THXPStreamType = {
    PyVarObject_HEAD_INIT(nullptr, 0)
    "torch._C._XpuStreamBase", /* tp_name */
    sizeof(THXPStream), /* tp_basicsize */