diff --git a/.clang-format b/.clang-format index 0b94540e7a25..2e5161504103 100644 --- a/.clang-format +++ b/.clang-format @@ -106,6 +106,8 @@ StatementMacros: - C10_DEFINE_int32 - C10_DEFINE_int64 - C10_DEFINE_string + - C10_DEFINE_REGISTRY_WITHOUT_WARNING + - C10_REGISTER_CREATOR - DEFINE_BINARY - PyObject_HEAD - PyObject_VAR_HEAD diff --git a/CMakeLists.txt b/CMakeLists.txt index c3ba29a50092..b43137523cfe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1057,7 +1057,6 @@ if(NOT MSVC) append_cxx_flag_if_supported("-Wconstant-conversion" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable" CMAKE_CXX_FLAGS) - append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS) append_cxx_flag_if_supported("-Qunused-arguments" CMAKE_CXX_FLAGS) if(${USE_COLORIZE_OUTPUT}) diff --git a/aten/src/ATen/native/QuantizedLinear.cpp b/aten/src/ATen/native/QuantizedLinear.cpp index e2f3f06f64c8..6d6c6909fe32 100644 --- a/aten/src/ATen/native/QuantizedLinear.cpp +++ b/aten/src/ATen/native/QuantizedLinear.cpp @@ -1,5 +1,4 @@ #define TORCH_ASSERT_ONLY_METHOD_OPERATORS -#include #include #include @@ -116,7 +115,7 @@ Tensor fbgemm_linear_int8_weight_fp32_activation( const Tensor bias_contig = bias.contiguous(); // Allocate output Tensor and a buffer for fbgemmPacked to use - std::vector output_size = input.sizes().vec(); + auto output_size = input.sizes().vec(); output_size.back() = N; Tensor output = at::empty(output_size, input.options().dtype(at::kFloat), LEGACY_CONTIGUOUS_MEMORY_FORMAT); Tensor buffer = at::empty(output_size, input.options().dtype(at::kInt), LEGACY_CONTIGUOUS_MEMORY_FORMAT); diff --git a/aten/src/ATen/native/RNN.cpp b/aten/src/ATen/native/RNN.cpp index eee703ce4733..bd503854899c 100644 --- a/aten/src/ATen/native/RNN.cpp +++ b/aten/src/ATen/native/RNN.cpp @@ -62,8 +62,6 @@ #include #endif -int register_linear_params(); - namespace at::native { namespace { diff --git a/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp b/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp index df74b10d70f9..c9c09cf2464f 100644 --- a/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp +++ b/aten/src/ATen/native/quantized/cpu/LinearUnpackImpl.cpp @@ -19,8 +19,6 @@ #include #endif -int register_linear_params(); - #ifdef USE_FBGEMM std::tuple> PackedLinearWeight::unpack() { auto packB = w.get(); diff --git a/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp b/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp index fc77d44e18c1..33edbfac1fc4 100644 --- a/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp +++ b/aten/src/ATen/native/quantized/cpu/fbgemm_utils.cpp @@ -28,7 +28,6 @@ #include #endif -int register_embedding_params(); #ifdef USE_FBGEMM @@ -381,9 +380,7 @@ namespace { } } -template -TORCH_API int -register_conv_params() { +template int register_conv_params() { static auto register_conv_params = torch::selective_class_>( "quantized", TORCH_SELECTIVE_CLASS(_hack_int_to_class_name(kSpatialDim))) @@ -420,9 +417,7 @@ TORCH_API int register_conv_params<2>(); template TORCH_API int register_conv_params<3>(); -TORCH_API int register_linear_params(); - -TORCH_API int register_linear_params() { +int register_linear_params() { using SerializationType = std::tuple>; static auto register_linear_params = torch::selective_class_( diff --git a/aten/src/ATen/native/quantized/cpu/fbgemm_utils.h b/aten/src/ATen/native/quantized/cpu/fbgemm_utils.h index 05d63c8476ac..62a21c439b78 100644 --- a/aten/src/ATen/native/quantized/cpu/fbgemm_utils.h +++ b/aten/src/ATen/native/quantized/cpu/fbgemm_utils.h @@ -6,7 +6,7 @@ #include #include -#ifdef USE_FBGEMM +#if defined(USE_FBGEMM) && __has_include() #include C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Winconsistent-missing-destructor-override") #include @@ -407,3 +407,8 @@ struct TORCH_API PackedEmbeddingBagWeight : public EmbeddingPackedParamsBase { bool include_last_offset, bool is_embedding_op) override; }; + +TORCH_API int register_linear_params(); +int register_embedding_params(); + +template TORCH_API int register_conv_params(); diff --git a/aten/src/ATen/native/quantized/cpu/qlinear.cpp b/aten/src/ATen/native/quantized/cpu/qlinear.cpp index 27139bcb40cb..05be6ebbf8d4 100644 --- a/aten/src/ATen/native/quantized/cpu/qlinear.cpp +++ b/aten/src/ATen/native/quantized/cpu/qlinear.cpp @@ -31,8 +31,6 @@ #include #include -int register_linear_params(); - #ifdef USE_FBGEMM template at::Tensor& PackedLinearWeight::apply_impl( diff --git a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp index 091e309cd95d..27e1b24d2362 100644 --- a/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp +++ b/aten/src/ATen/native/quantized/cpu/qlinear_dynamic.cpp @@ -29,8 +29,6 @@ #include #include -int register_linear_params(); - #ifdef USE_FBGEMM template at::Tensor PackedLinearWeight::apply_dynamic_impl( diff --git a/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp b/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp index 3e4ce69a8994..d40552037057 100644 --- a/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp +++ b/aten/src/ATen/native/quantized/cpu/qlinear_prepack.cpp @@ -31,8 +31,6 @@ #include #include -int register_linear_params(); - #ifdef USE_FBGEMM namespace { // Calculate the column offsets. diff --git a/aten/src/ATen/native/quantized/cudnn/Conv.cpp b/aten/src/ATen/native/quantized/cudnn/Conv.cpp index d37f674f5c77..dff887118ee5 100644 --- a/aten/src/ATen/native/quantized/cudnn/Conv.cpp +++ b/aten/src/ATen/native/quantized/cudnn/Conv.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -22,12 +23,6 @@ #include #include -template -int register_conv_params(); - -extern template int register_conv_params<2>(); -extern template int register_conv_params<3>(); - // TODO: there is a table from input dtype and weight dtype to operator qdtype, // we can derive the operator dtype based on input dtype cudnn_frontend::ConvDesc_v8 getConvDescriptor(cudnnDataType_t dataType, c10::IntArrayRef padding, c10::IntArrayRef stride, c10::IntArrayRef dilation) { diff --git a/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp b/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp index 9103bdd0d414..da46dccab316 100644 --- a/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp +++ b/aten/src/ATen/native/quantized/cudnn/ConvPrepack.cpp @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -15,12 +16,6 @@ #include -template -int register_conv_params(); - -extern template int register_conv_params<2>(); -extern template int register_conv_params<3>(); - template c10::intrusive_ptr> PackedConvWeightCudnn< kSpatialDim>:: diff --git a/aten/src/ATen/native/quantized/library.cpp b/aten/src/ATen/native/quantized/library.cpp index 72dcda2b74de..7d2ed9f42e04 100644 --- a/aten/src/ATen/native/quantized/library.cpp +++ b/aten/src/ATen/native/quantized/library.cpp @@ -1,13 +1,5 @@ #include - -int register_linear_params(); - -template -int register_conv_params(); - -extern template int register_conv_params<2>(); -extern template int register_conv_params<3>(); -int register_embedding_params(); +#include TORCH_LIBRARY(quantized, m) { m.set_python_module("caffe2.torch.fb.model_transform.splitting.split_dispatcher"); diff --git a/aten/src/ATen/native/quantized/qconv_unpack.cpp b/aten/src/ATen/native/quantized/qconv_unpack.cpp index f33bd6cf96fe..f613097fdba1 100644 --- a/aten/src/ATen/native/quantized/qconv_unpack.cpp +++ b/aten/src/ATen/native/quantized/qconv_unpack.cpp @@ -28,13 +28,6 @@ and /cudnn/ConvUnpackImpl.cpp, for cudnn. #include #endif -template -int register_conv_params(); - -extern template int register_conv_params<2>(); -extern template int register_conv_params<3>(); - - namespace at::native { namespace { diff --git a/aten/src/ATen/native/quantized/qlinear_unpack.cpp b/aten/src/ATen/native/quantized/qlinear_unpack.cpp index a485094df3ca..2999e183b9ef 100644 --- a/aten/src/ATen/native/quantized/qlinear_unpack.cpp +++ b/aten/src/ATen/native/quantized/qlinear_unpack.cpp @@ -13,9 +13,6 @@ and /cudnn/linear_unpack_impl.cpp, for cudnn. #include #include -int register_linear_params(); - - namespace at::native { namespace { diff --git a/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp b/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp index 47a9a02ae810..478b73e09938 100644 --- a/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp +++ b/torch/csrc/distributed/c10d/GlooDeviceFactory.cpp @@ -96,7 +96,7 @@ static std::shared_ptr<::gloo::transport::Device> makeTCPTLSDevice( attr, pkey, cert, caFile, caPath); } -C10_REGISTER_CREATOR(GlooDeviceRegistry, TCP_TLS, makeTCPTLSDevice); +C10_REGISTER_CREATOR(GlooDeviceRegistry, TCP_TLS, makeTCPTLSDevice) #endif #if GLOO_HAVE_TRANSPORT_UV @@ -120,9 +120,9 @@ static std::shared_ptr<::gloo::transport::Device> makeUVDevice( // Registry priority is per key identifier. We register UV to `APPLE` for // the flexibility of other application to override by priority. Register // UV to `UV` for env "GLOO_DEVICE_TRANSPORT" override. -C10_REGISTER_CREATOR(GlooDeviceRegistry, APPLE, makeUVDevice); -C10_REGISTER_CREATOR(GlooDeviceRegistry, WIN32, makeUVDevice); -C10_REGISTER_CREATOR(GlooDeviceRegistry, UV, makeUVDevice); +C10_REGISTER_CREATOR(GlooDeviceRegistry, APPLE, makeUVDevice) +C10_REGISTER_CREATOR(GlooDeviceRegistry, WIN32, makeUVDevice) +C10_REGISTER_CREATOR(GlooDeviceRegistry, UV, makeUVDevice) #endif namespace { diff --git a/torch/csrc/distributed/rpc/tensorpipe_agent.cpp b/torch/csrc/distributed/rpc/tensorpipe_agent.cpp index a08aaa4576f6..9801a0327ddf 100644 --- a/torch/csrc/distributed/rpc/tensorpipe_agent.cpp +++ b/torch/csrc/distributed/rpc/tensorpipe_agent.cpp @@ -153,11 +153,11 @@ void makeStreamsWaitOnOthers( C10_DEFINE_REGISTRY_WITHOUT_WARNING( TensorPipeTransportRegistry, - TransportRegistration); + TransportRegistration) C10_DEFINE_REGISTRY_WITHOUT_WARNING( TensorPipeChannelRegistry, - ChannelRegistration); + ChannelRegistration) const std::string& TensorPipeAgent::guessAddress() { static const std::string uvAddress = []() { @@ -284,7 +284,7 @@ std::unique_ptr makeMultiplexedUvChannel() { C10_REGISTER_CREATOR( TensorPipeChannelRegistry, mpt_uv, - makeMultiplexedUvChannel); + makeMultiplexedUvChannel) } // namespace diff --git a/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp b/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp index 97341a41899c..4c326b6a0e27 100644 --- a/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp +++ b/torch/csrc/distributed/rpc/tensorpipe_cuda.cpp @@ -24,7 +24,7 @@ std::unique_ptr makeCudaIpcChannel() { } // The cuda_ipc channels use cudaMemcpy to transmit CUDA tensor across processes -C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_ipc, makeCudaIpcChannel); +C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_ipc, makeCudaIpcChannel) #endif @@ -44,7 +44,7 @@ std::unique_ptr makeCudaGdrChannel() { // in order to ensure readiness and to agree on the device indices and thus the // queue pair to use. It automatically pairs each GPU to the "closest" NIC if // there are multiple of them (closest = longest prefix match in PCI tree). -C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_gdr, makeCudaGdrChannel); +C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_gdr, makeCudaGdrChannel) #endif @@ -55,7 +55,7 @@ std::unique_ptr makeCudaXthChannel() { } // The cuda_xth channel supports same-process GPU-to-GPU comm -C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_xth, makeCudaXthChannel); +C10_REGISTER_CREATOR(TensorPipeChannelRegistry, cuda_xth, makeCudaXthChannel) std::unique_ptr makeCudaBasicChannel() { auto context = tensorpipe::channel::cuda_basic::create( @@ -68,7 +68,7 @@ std::unique_ptr makeCudaBasicChannel() { C10_REGISTER_CREATOR( TensorPipeChannelRegistry, cuda_basic, - makeCudaBasicChannel); + makeCudaBasicChannel) class TensorpipeCudaConverter : public TensorpipeDeviceTypeConverter { public: diff --git a/torch/csrc/jit/passes/onnx/constant_map.h b/torch/csrc/jit/passes/onnx/constant_map.h index 60d4470c1b12..7b447fa74231 100644 --- a/torch/csrc/jit/passes/onnx/constant_map.h +++ b/torch/csrc/jit/passes/onnx/constant_map.h @@ -2,15 +2,10 @@ #include -C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wsuggest-override") -C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wnewline-eof") #include -C10_DIAGNOSTIC_POP() -C10_DIAGNOSTIC_POP() #include #include -#include #include namespace torch::jit { diff --git a/torch/csrc/jit/serialization/export.cpp b/torch/csrc/jit/serialization/export.cpp index 84c0a46a7746..d95c45a7e6b9 100644 --- a/torch/csrc/jit/serialization/export.cpp +++ b/torch/csrc/jit/serialization/export.cpp @@ -19,18 +19,14 @@ #include #include #include -#include -C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wnewline-eof") #include -C10_DIAGNOSTIC_POP() #include #include -C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wsuggest-override") #include -C10_DIAGNOSTIC_POP() #include +#include #include #include #include