Enable more C++ warnings (#143355)

Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/143355 Approved by: https://github.com/albanD
2025-10-20 21:14:14 +08:00 · 2024-12-27 05:46:55 +00:00
parent fca457b5db
commit 379bbef23c
20 changed files with 39 additions and 76 deletions
--- a/.clang-format
+++ b/.clang-format
@ -106,6 +106,8 @@ StatementMacros:
  - C10_DEFINE_int32
  - C10_DEFINE_int64
  - C10_DEFINE_string
+  - C10_DEFINE_REGISTRY_WITHOUT_WARNING
+  - C10_REGISTER_CREATOR
  - DEFINE_BINARY
  - PyObject_HEAD
  - PyObject_VAR_HEAD
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -1057,7 +1057,6 @@ if(NOT MSVC)
  append_cxx_flag_if_supported("-Wconstant-conversion" CMAKE_CXX_FLAGS)
  append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable"
                               CMAKE_CXX_FLAGS)
-  append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
  append_cxx_flag_if_supported("-Qunused-arguments" CMAKE_CXX_FLAGS)

  if(${USE_COLORIZE_OUTPUT})
--- a/aten/src/ATen/native/QuantizedLinear.cpp
+++ b/aten/src/ATen/native/QuantizedLinear.cpp
@ -1,5 +1,4 @@
 #define TORCH_ASSERT_ONLY_METHOD_OPERATORS
-#include <vector>

 #include <ATen/core/Tensor.h>
 #include <ATen/Parallel.h>
@ -116,7 +115,7 @@ Tensor fbgemm_linear_int8_weight_fp32_activation(
  const Tensor bias_contig = bias.contiguous();

  // Allocate output Tensor and a buffer for fbgemmPacked to use
-  std::vector<int64_t> output_size = input.sizes().vec();
+  auto output_size = input.sizes().vec();
  output_size.back() = N;
  Tensor output = at::empty(output_size, input.options().dtype(at::kFloat), LEGACY_CONTIGUOUS_MEMORY_FORMAT);
  Tensor buffer = at::empty(output_size, input.options().dtype(at::kInt), LEGACY_CONTIGUOUS_MEMORY_FORMAT);
--- a/aten/src/ATen/native/RNN.cpp
+++ b/aten/src/ATen/native/RNN.cpp
@ -7,6 +7,7 @@
 #include <ATen/TensorOperators.h>
 #include <ATen/mps/MPSDevice.h>
 #include <ATen/native/quantized/PackedParams.h>
+#include <ATen/native/quantized/library.h>
 #include <ATen/native/quantized/cpu/fbgemm_utils.h>
 #include <ATen/native/quantized/cpu/QnnpackUtils.h>
 #include <c10/core/GradMode.h>
@ -62,8 +63,6 @@
 #include <utility>
 #endif

-int register_linear_params();
-
 namespace at::native {

 namespace {
--- a/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp
+++ b/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp
@ -19,8 +19,6 @@
 #include <ATen/ops/from_blob.h>
 #endif

-int register_linear_params();
-
 #ifdef USE_FBGEMM
 std::tuple<at::Tensor, std::optional<at::Tensor>> PackedLinearWeight::unpack() {
  auto packB = w.get();
--- a/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp
+++ b/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp
@ -14,6 +14,7 @@
 #include <ATen/native/TensorFactories.h>
 #include <ATen/quantized/QTensorImpl.h>
 #include <ATen/quantized/Quantizer.h>
+#include <aten/src/ATen/native/quantized/library.h>
 #include <c10/core/QScheme.h>
 #include <c10/core/TensorOptions.h>
 #include <c10/util/accumulate.h>
@ -28,7 +29,6 @@
 #include <utility>
 #endif

-int register_embedding_params();

 #ifdef USE_FBGEMM

@ -381,9 +381,7 @@ namespace {
  }
 }

-template <int kSpatialDim = 2>
-TORCH_API int
-register_conv_params() {
+template <int kSpatialDim> int register_conv_params() {
  static auto register_conv_params =
    torch::selective_class_<ConvPackedParamsBase<kSpatialDim>>(
        "quantized", TORCH_SELECTIVE_CLASS(_hack_int_to_class_name(kSpatialDim)))
@ -420,9 +418,7 @@ TORCH_API int register_conv_params<2>();
 template
 TORCH_API int register_conv_params<3>();

-TORCH_API int register_linear_params();
-
-TORCH_API int register_linear_params() {
+int register_linear_params() {
  using SerializationType = std::tuple<at::Tensor, std::optional<at::Tensor>>;
  static auto register_linear_params =
      torch::selective_class_<LinearPackedParamsBase>(
--- a/aten/src/ATen/native/quantized/cpu/qlinear.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear.cpp
@ -4,12 +4,13 @@
 #include <ATen/Parallel.h>
 #include <ATen/TensorOperators.h>
 #include <ATen/native/quantized/cpu/fbgemm_utils.h>
-#include <ATen/native/quantized/PackedParams.h>
 #include <ATen/native/quantized/cpu/QnnpackUtils.h>
 #include <ATen/native/quantized/cpu/XnnpackUtils.h>
 #include <ATen/native/quantized/cpu/OnednnUtils.h>
 #include <ATen/native/quantized/cpu/QuantUtils.h>
 #include <ATen/native/quantized/cpu/qlinear.h>
+#include <ATen/native/quantized/library.h>
+#include <ATen/native/quantized/PackedParams.h>
 #include <ATen/native/mkldnn/MKLDNNCommon.h>
 #include <caffe2/utils/threadpool/pthreadpool-cpp.h>
 #include <torch/library.h>
@ -31,8 +32,6 @@
 #include <algorithm>
 #include <string>

-int register_linear_params();
-
 #ifdef USE_FBGEMM
 template <bool ReluFused>
 at::Tensor& PackedLinearWeight::apply_impl(
--- a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp
@ -3,10 +3,11 @@
 #include <ATen/Context.h>
 #include <ATen/Parallel.h>
 #include <ATen/native/quantized/cpu/fbgemm_utils.h>
-#include <ATen/native/quantized/PackedParams.h>
 #include <ATen/native/quantized/cpu/QnnpackUtils.h>
 #include <ATen/native/quantized/cpu/OnednnUtils.h>
 #include <ATen/native/quantized/cpu/QuantUtils.h>
+#include <ATen/native/quantized/library.h>
+#include <ATen/native/quantized/PackedParams.h>
 #include <ATen/native/mkldnn/MKLDNNCommon.h>
 #include <caffe2/utils/threadpool/pthreadpool-cpp.h>
 #include <torch/library.h>
@ -29,8 +30,6 @@
 #include <string>
 #include <type_traits>

-int register_linear_params();
-
 #ifdef USE_FBGEMM
 template <bool ReluFused>
 at::Tensor PackedLinearWeight::apply_dynamic_impl(
--- a/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
+++ b/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp
@ -4,10 +4,11 @@
 #include <ATen/Context.h>
 #include <ATen/native/quantized/cpu/fbgemm_utils.h>
 #include <ATen/native/quantized/cpu/init_qnnpack.h>
-#include <ATen/native/quantized/PackedParams.h>
 #include <ATen/native/quantized/cpu/QnnpackUtils.h>
 #include <ATen/native/quantized/cpu/OnednnUtils.h>
 #include <ATen/native/quantized/cpu/QuantUtils.h>
+#include <ATen/native/quantized/library.h>
+#include <ATen/native/quantized/PackedParams.h>
 #include <ATen/native/mkldnn/MKLDNNCommon.h>
 #include <ATen/quantized/Quantizer.h>
 #include <torch/custom_class.h>
@ -31,8 +32,6 @@
 #include <utility>
 #include <vector>

-int register_linear_params();
-
 #ifdef USE_FBGEMM
 namespace {
 // Calculate the column offsets.
--- a/aten/src/ATen/native/quantized/cudnn/Conv.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/Conv.cpp
@ -10,6 +10,7 @@
 #include <ATen/cudnn/Handle.h>
 #include <ATen/native/cudnn/ConvShared.h>
 #include <ATen/native/quantized/cudnn/utils.h>
+#include <ATen/native/quantized/library.h>
 #include <ATen/native/quantized/ConvUtils.h>
 #include <ATen/native/quantized/PackedParams.h>
 #include <ATen/native/utils/ParamsHash.h>
@ -22,12 +23,6 @@
 #include <unordered_map>
 #include <vector>

-template <int kSpatialDim = 2>
-int register_conv_params();
-
-extern template int register_conv_params<2>();
-extern template int register_conv_params<3>();
-
 // TODO: there is a table from input dtype and weight dtype to operator qdtype,
 // we can derive the operator dtype based on input dtype
 cudnn_frontend::ConvDesc_v8 getConvDescriptor(cudnnDataType_t dataType, c10::IntArrayRef padding, c10::IntArrayRef stride, c10::IntArrayRef dilation) {
--- a/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp
+++ b/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp
@ -7,6 +7,7 @@
 #include <torch/library.h>
 #include <ATen/native/quantized/cpu/QuantUtils.h>
 #include <ATen/native/quantized/cudnn/utils.h>
+#include <ATen/native/quantized/library.h>
 #include <ATen/native/quantized/PackedParams.h>
 #include <ATen/quantized/Quantizer.h>
 #include <c10/core/QScheme.h>
@ -15,12 +16,6 @@

 #include <utility>

-template <int kSpatialDim = 2>
-int register_conv_params();
-
-extern template int register_conv_params<2>();
-extern template int register_conv_params<3>();
-
 template <int kSpatialDim>
 c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> PackedConvWeightCudnn<
    kSpatialDim>::
--- a/aten/src/ATen/native/quantized/library.cpp
+++ b/aten/src/ATen/native/quantized/library.cpp
@ -1,13 +1,5 @@
 #include <torch/library.h>
-
-int register_linear_params();
-
-template <int kSpatialDim = 2>
-int register_conv_params();
-
-extern template int register_conv_params<2>();
-extern template int register_conv_params<3>();
-int register_embedding_params();
+#include <aten/src/ATen/native/quantized/library.h>

 TORCH_LIBRARY(quantized, m) {
  m.set_python_module("caffe2.torch.fb.model_transform.splitting.split_dispatcher");
--- a/aten/src/ATen/native/quantized/library.h
+++ b/aten/src/ATen/native/quantized/library.h
@ -0,0 +1,8 @@
+#pragma once
+
+#include <c10/macros/Export.h>
+
+TORCH_API int register_linear_params();
+int register_embedding_params();
+
+template <int kSpatialDim = 2> TORCH_API int register_conv_params();
--- a/aten/src/ATen/native/quantized/qconv_unpack.cpp
+++ b/aten/src/ATen/native/quantized/qconv_unpack.cpp
@ -19,6 +19,7 @@ and /cudnn/ConvUnpackImpl.cpp, for cudnn.
 #include <ATen/native/quantized/cpu/OnednnUtils.h>
 #include <ATen/native/quantized/cpu/QuantUtils.h>
 #include <ATen/native/quantized/PackedParams.h>
+#include <ATen/native/quantized/library.h>

 #ifndef AT_PER_OPERATOR_HEADERS
 #include <ATen/Functions.h>
@ -28,13 +29,6 @@ and /cudnn/ConvUnpackImpl.cpp, for cudnn.
 #include <ATen/ops/from_blob.h>
 #endif

-template <int kSpatialDim = 2>
-int register_conv_params();
-
-extern template int register_conv_params<2>();
-extern template int register_conv_params<3>();
-
-

 namespace at::native {
 namespace {
--- a/aten/src/ATen/native/quantized/qlinear_unpack.cpp
+++ b/aten/src/ATen/native/quantized/qlinear_unpack.cpp
@ -8,14 +8,12 @@ and /cudnn/linear_unpack_impl.cpp, for cudnn.
 */
 #include <ATen/ATen.h>
 #include <ATen/native/quantized/cpu/fbgemm_utils.h>
-#include <ATen/native/quantized/PackedParams.h>
 #include <ATen/native/quantized/cpu/QnnpackUtils.h>
+#include <ATen/native/quantized/library.h>
+#include <ATen/native/quantized/PackedParams.h>
 #include <torch/custom_class.h>
 #include <torch/library.h>

-int register_linear_params();
-
-
 namespace at::native {
 namespace {

--- a/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp
+++ b/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp
@ -96,7 +96,7 @@ static std::shared_ptr<::gloo::transport::Device> makeTCPTLSDevice(
      attr, pkey, cert, caFile, caPath);
 }

-C10_REGISTER_CREATOR(GlooDeviceRegistry, TCP_TLS, makeTCPTLSDevice);
+C10_REGISTER_CREATOR(GlooDeviceRegistry, TCP_TLS, makeTCPTLSDevice)
 #endif

 #if GLOO_HAVE_TRANSPORT_UV
@ -120,9 +120,9 @@ static std::shared_ptr<::gloo::transport::Device> makeUVDevice(
 // Registry priority is per key identifier. We register UV to `APPLE` for
 // the flexibility of other application to override by priority. Register
 // UV to `UV` for env "GLOO_DEVICE_TRANSPORT" override.
-C10_REGISTER_CREATOR(GlooDeviceRegistry, APPLE, makeUVDevice);
-C10_REGISTER_CREATOR(GlooDeviceRegistry, WIN32, makeUVDevice);
-C10_REGISTER_CREATOR(GlooDeviceRegistry, UV, makeUVDevice);
+C10_REGISTER_CREATOR(GlooDeviceRegistry, APPLE, makeUVDevice)
+C10_REGISTER_CREATOR(GlooDeviceRegistry, WIN32, makeUVDevice)
+C10_REGISTER_CREATOR(GlooDeviceRegistry, UV, makeUVDevice)
 #endif

 namespace {
--- a/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
+++ b/torch/csrc/distributed/rpc/tensorpipe_agent.cpp
@ -153,11 +153,11 @@ void makeStreamsWaitOnOthers(

 C10_DEFINE_REGISTRY_WITHOUT_WARNING(
    TensorPipeTransportRegistry,
-    TransportRegistration);
+    TransportRegistration)

 C10_DEFINE_REGISTRY_WITHOUT_WARNING(
    TensorPipeChannelRegistry,
-    ChannelRegistration);
+    ChannelRegistration)

 const std::string& TensorPipeAgent::guessAddress() {
  static const std::string uvAddress = []() {
@ -284,7 +284,7 @@ std::unique_ptr<ChannelRegistration> makeMultiplexedUvChannel() {
 C10_REGISTER_CREATOR(
    TensorPipeChannelRegistry,
    mpt_uv,
-    makeMultiplexedUvChannel);
+    makeMultiplexedUvChannel)

 } // namespace

--- a/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
+++ b/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp
@ -24,7 +24,7 @@ std::unique_ptr<ChannelRegistration> makeCudaIpcChannel() {
 }

 // The cuda_ipc channels use cudaMemcpy to transmit CUDA tensor across processes
-C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_ipc, makeCudaIpcChannel);
+C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_ipc, makeCudaIpcChannel)

 #endif

@ -44,7 +44,7 @@ std::unique_ptr<ChannelRegistration> makeCudaGdrChannel() {
 // in order to ensure readiness and to agree on the device indices and thus the
 // queue pair to use. It automatically pairs each GPU to the "closest" NIC if
 // there are multiple of them (closest = longest prefix match in PCI tree).
-C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_gdr, makeCudaGdrChannel);
+C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_gdr, makeCudaGdrChannel)

 #endif

@ -55,7 +55,7 @@ std::unique_ptr<ChannelRegistration> makeCudaXthChannel() {
 }

 // The cuda_xth channel supports same-process GPU-to-GPU comm
-C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_xth, makeCudaXthChannel);
+C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_xth, makeCudaXthChannel)

 std::unique_ptr<ChannelRegistration> makeCudaBasicChannel() {
  auto context = tensorpipe::channel::cuda_basic::create(
@ -68,7 +68,7 @@ std::unique_ptr<ChannelRegistration> makeCudaBasicChannel() {
 C10_REGISTER_CREATOR(
    TensorPipeChannelRegistry,
    cuda_basic,
-    makeCudaBasicChannel);
+    makeCudaBasicChannel)

 class TensorpipeCudaConverter : public TensorpipeDeviceTypeConverter {
 public:
--- a/torch/csrc/jit/passes/onnx/constant_map.h
+++ b/torch/csrc/jit/passes/onnx/constant_map.h
@ -2,15 +2,10 @@

 #include <c10/macros/Macros.h>

-C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wsuggest-override")
-C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wnewline-eof")
 #include <onnx/shape_inference/implementation.h>
-C10_DIAGNOSTIC_POP()
-C10_DIAGNOSTIC_POP()

 #include <torch/csrc/jit/ir/ir.h>
 #include <torch/csrc/jit/serialization/export.h>
-#include <mutex>
 #include <unordered_map>

 namespace torch::jit {
--- a/torch/csrc/jit/serialization/export.cpp
+++ b/torch/csrc/jit/serialization/export.cpp
@ -19,18 +19,14 @@
 #include <torch/csrc/onnx/back_compat.h>
 #include <torch/csrc/onnx/onnx.h>
 #include <torch/version.h>
-#include <optional>

-C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wnewline-eof")
 #include <onnx/checker.h>
-C10_DIAGNOSTIC_POP()
 #include <onnx/onnx_pb.h>
 #include <onnx/proto_utils.h>
-C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wsuggest-override")
 #include <onnx/shape_inference/implementation.h>
-C10_DIAGNOSTIC_POP()

 #include <memory>
+#include <optional>
 #include <regex>
 #include <set>
 #include <sstream>