diff --git a/.clang-tidy b/.clang-tidy index 71ffdf8cb224..3c96106fe53d 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -66,7 +66,6 @@ readability-simplify-subscript-expr, readability-string-compare, -readability-redundant-access-specifiers, -readability-redundant-control-flow, --readability-redundant-inline-specifier, ' HeaderFilterRegex: '^(aten/|c10/|torch/).*$' WarningsAsErrors: '*' diff --git a/aten/src/ATen/TensorIndexing.h b/aten/src/ATen/TensorIndexing.h index 7785ca861f0e..a487589833e8 100644 --- a/aten/src/ATen/TensorIndexing.h +++ b/aten/src/ATen/TensorIndexing.h @@ -214,7 +214,7 @@ inline Tensor applySlice( "step must be greater than zero"); // See NOTE [nested tensor size for indexing] - if (self_sizes.has_value() && !self_sizes.value().empty()) { + if (self_sizes.has_value() && self_sizes.value().size() > 0) { // Skip this optimization if we are tracing, as the trace may be polymorphic // over the shape of the `self` tensor, and we still want to record // the slice. diff --git a/aten/src/ATen/core/TransformationHelper.h b/aten/src/ATen/core/TransformationHelper.h index dad18bd019bb..f81018a8e674 100644 --- a/aten/src/ATen/core/TransformationHelper.h +++ b/aten/src/ATen/core/TransformationHelper.h @@ -117,7 +117,7 @@ C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) { template <> C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) { // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function - return median + sigma * at::tan(c10::pi * (val - 0.5)); + return median + sigma * at::tan(c10::pi * (val - static_cast(0.5))); } /** diff --git a/aten/src/ATen/core/boxing/BoxedKernel_impl.h b/aten/src/ATen/core/boxing/BoxedKernel_impl.h index 331141bcc8c9..1960607c6bc8 100644 --- a/aten/src/ATen/core/boxing/BoxedKernel_impl.h +++ b/aten/src/ATen/core/boxing/BoxedKernel_impl.h @@ -2,7 +2,7 @@ namespace c10 { -inline BoxedKernel::BoxedKernel() : boxed_kernel_func_(nullptr) {} +inline BoxedKernel::BoxedKernel() : functor_(), boxed_kernel_func_(nullptr) {} inline BoxedKernel::BoxedKernel( std::unique_ptr functor, diff --git a/aten/src/ATen/core/boxing/KernelFunction_impl.h b/aten/src/ATen/core/boxing/KernelFunction_impl.h index bb981c1d4efd..672309ec19a2 100644 --- a/aten/src/ATen/core/boxing/KernelFunction_impl.h +++ b/aten/src/ATen/core/boxing/KernelFunction_impl.h @@ -20,7 +20,9 @@ make_unique_base(Args&&... args) { } // namespace detail inline KernelFunction::KernelFunction() - : unboxed_kernel_func_(nullptr), sym_unboxed_kernel_func_(nullptr) {} + : boxed_kernel_func_(), + unboxed_kernel_func_(nullptr), + sym_unboxed_kernel_func_(nullptr) {} inline KernelFunction::~KernelFunction() { if (tokens_) { diff --git a/aten/src/ATen/core/dispatch/Dispatcher.h b/aten/src/ATen/core/dispatch/Dispatcher.h index 3a65b288bf8e..43eb0028c70f 100644 --- a/aten/src/ATen/core/dispatch/Dispatcher.h +++ b/aten/src/ATen/core/dispatch/Dispatcher.h @@ -96,7 +96,7 @@ class TORCH_API Dispatcher final { friend class TypedOperatorHandle; struct Guard final { - Guard() : alive(true) {} + Guard() : alive(true), mutex() {} std::atomic alive; std::mutex mutex; }; diff --git a/aten/src/ATen/core/op_registration/op_allowlist.h b/aten/src/ATen/core/op_registration/op_allowlist.h index 1f39ba4e3871..3e8e03f9fa4c 100644 --- a/aten/src/ATen/core/op_registration/op_allowlist.h +++ b/aten/src/ATen/core/op_registration/op_allowlist.h @@ -114,7 +114,7 @@ constexpr bool allowlist_contains(std::string_view allowlist, std::string_view i } next++; } else { - if (allowlist.substr(cur) == item) { + if (allowlist.substr(cur).compare(item) == 0) { return true; } break; diff --git a/aten/src/ATen/core/op_registration/op_registration.h b/aten/src/ATen/core/op_registration/op_registration.h index d441269bf297..7a44cfa49b07 100644 --- a/aten/src/ATen/core/op_registration/op_registration.h +++ b/aten/src/ATen/core/op_registration/op_registration.h @@ -411,6 +411,7 @@ public: Options() : schemaOrName_(std::nullopt) + , kernels() , aliasAnalysisKind_(std::nullopt) {} @@ -419,6 +420,7 @@ public: struct KernelRegistrationConfig final { KernelRegistrationConfig() : dispatch_key(std::nullopt) + , func() , cpp_signature(std::nullopt) , inferred_function_schema(nullptr) {} diff --git a/aten/src/ATen/cuda/CUDAGeneratorImpl.h b/aten/src/ATen/cuda/CUDAGeneratorImpl.h index d4ab49382e7f..d31fde56ebc1 100644 --- a/aten/src/ATen/cuda/CUDAGeneratorImpl.h +++ b/aten/src/ATen/cuda/CUDAGeneratorImpl.h @@ -99,8 +99,8 @@ struct CUDAGeneratorState : public c10::intrusive_ptr_target { uint64_t offset_intragraph_; bool capturing_{}; std::unordered_set registered_graphs_; - at::TensorBase seed_extragraph_; - at::TensorBase offset_extragraph_; + at::TensorBase seed_extragraph_{}; + at::TensorBase offset_extragraph_{}; CUDAGeneratorState( uint64_t seed = default_rng_seed_val, @@ -167,7 +167,7 @@ struct TORCH_CUDA_CPP_API CUDAGeneratorImpl : public c10::GeneratorImpl { CUDAGeneratorImpl* clone_impl() const override; c10::intrusive_ptr state_; - std::atomic_flag no_reset_rnn_state_; + std::atomic_flag no_reset_rnn_state_{}; }; namespace cuda::detail { diff --git a/aten/src/ATen/cuda/detail/DeviceThreadHandles.h b/aten/src/ATen/cuda/detail/DeviceThreadHandles.h index 71a344d281d2..1f80c863b639 100644 --- a/aten/src/ATen/cuda/detail/DeviceThreadHandles.h +++ b/aten/src/ATen/cuda/detail/DeviceThreadHandles.h @@ -122,7 +122,7 @@ struct DeviceThreadHandlePool : public std::enable_shared_from_this 0) { auto parent = weak_parent.lock(); if (!parent) { // If this thread exits after atexit handlers have completed, the diff --git a/aten/src/ATen/native/RangeUtils.h b/aten/src/ATen/native/RangeUtils.h index fd62b8e01329..dcab86ca9a42 100644 --- a/aten/src/ATen/native/RangeUtils.h +++ b/aten/src/ATen/native/RangeUtils.h @@ -47,7 +47,7 @@ int64_t compute_arange_size(const Scalar& start, const Scalar& end, const Scalar int64_t sgn = (xstep > 0) - (xstep < 0); size_d = std::ceil((xend - xstart + xstep - sgn) / xstep); } else { - size_d = std::ceil((end.to() - start.to()) + size_d = std::ceil(static_cast(end.to() - start.to()) / step.to()); } diff --git a/aten/src/ATen/native/UpSample.h b/aten/src/ATen/native/UpSample.h index d37ccbe2763d..5b49fdd02954 100644 --- a/aten/src/ATen/native/UpSample.h +++ b/aten/src/ATen/native/UpSample.h @@ -4,6 +4,7 @@ #include #include +#include #include #include #include diff --git a/aten/src/ATen/native/cpu/AtomicAddFloat.h b/aten/src/ATen/native/cpu/AtomicAddFloat.h index 526f86d705b7..5b24ee4821c4 100644 --- a/aten/src/ATen/native/cpu/AtomicAddFloat.h +++ b/aten/src/ATen/native/cpu/AtomicAddFloat.h @@ -22,7 +22,7 @@ static inline void cpu_atomic_add_float(float* dst, float fvalue) old_value.floatV = *dst; new_value.floatV = old_value.floatV + fvalue; - unsigned* old_intV = &old_value.intV; + unsigned* old_intV = (unsigned*)(&old_value.intV); while (!std::atomic_compare_exchange_strong(dst_intV, old_intV, new_value.intV)) { #ifdef __aarch64__ __asm__ __volatile__("yield;" : : : "memory"); diff --git a/aten/src/ATen/native/cpu/BlasKernel.cpp b/aten/src/ATen/native/cpu/BlasKernel.cpp index 5256b964ec49..ab3b16c395a3 100644 --- a/aten/src/ATen/native/cpu/BlasKernel.cpp +++ b/aten/src/ATen/native/cpu/BlasKernel.cpp @@ -118,7 +118,7 @@ gemm_notrans_( scale_(m, n, beta, c, ldc); // c += alpha * (a @ b) - const uint64_t unsigned_m = m; + const uint64_t unsigned_m = static_cast(m); const uint64_t i_m = unsigned_m / 4; for (const uint64_t l : c10::irange(k)) { for (const uint64_t j : c10::irange(n)) { diff --git a/aten/src/ATen/native/cpu/ReduceUtils.h b/aten/src/ATen/native/cpu/ReduceUtils.h index 1b0be8d18db7..fd7c4a2750a6 100644 --- a/aten/src/ATen/native/cpu/ReduceUtils.h +++ b/aten/src/ATen/native/cpu/ReduceUtils.h @@ -8,6 +8,7 @@ #include #include #include +#include namespace at::native { inline namespace CPU_CAPABILITY { diff --git a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp index 5c677f648ca6..dac0f3bef25e 100644 --- a/aten/src/ATen/native/cpu/SoftMaxKernel.cpp +++ b/aten/src/ATen/native/cpu/SoftMaxKernel.cpp @@ -17,6 +17,7 @@ #include #include #include +#include // [Note AVX-SSE transitions] In general we avoid calls into cmath for code // compiled with AVX/AVX2 This is because of SSE-AVX transitions and a bug in diff --git a/aten/src/ATen/native/cpu/Unfold2d.cpp b/aten/src/ATen/native/cpu/Unfold2d.cpp index 06958fce1754..8c94decfff02 100644 --- a/aten/src/ATen/native/cpu/Unfold2d.cpp +++ b/aten/src/ATen/native/cpu/Unfold2d.cpp @@ -240,7 +240,7 @@ static void unfolded2d_copy( int64_t output_height, int64_t output_width) { at::parallel_for( - 0, n_input_plane * kH * kW, 0, [&](int64_t start, int64_t end) { + 0, (int64_t)n_input_plane * kH * kW, 0, [&](int64_t start, int64_t end) { for (const auto k : c10::irange(start, end)) { int64_t nip = k / (kH * kW); int64_t rest = k % (kH * kW); @@ -316,7 +316,7 @@ static void unfolded2d_copy( for (int64_t x = 0; x < output_width; x++) memcpy( dst + (size_t)y * output_width + x, - src + (size_t)iy * input_width + ix + x * dW, + src + (size_t)iy * input_width + ix + (int64_t)x * dW, sizeof(scalar_t) * (1)); } } diff --git a/aten/src/ATen/native/cpu/int4mm_kernel.cpp b/aten/src/ATen/native/cpu/int4mm_kernel.cpp index 7e0e732d9c83..c8e0b8e86793 100644 --- a/aten/src/ATen/native/cpu/int4mm_kernel.cpp +++ b/aten/src/ATen/native/cpu/int4mm_kernel.cpp @@ -906,7 +906,7 @@ static void ref_dyn_quant_matmul_4bit_channelwise_kernel( // Round to nearest integer const int32_t nudged_zero_point0 = lrintf(zero_point0); - int8_t* dst_ptr = lhs_qa8dx + m_idx * dst_stride; + int8_t* dst_ptr = (int8_t*)lhs_qa8dx + m_idx * dst_stride; // LHS offset at the beginning of the row *((float*)(dst_ptr)) = recip_scale0; @@ -1048,7 +1048,7 @@ static void ref_dyn_quant_matmul_4bit_groupwise_kernel( zero_point0 = (std::min)(zero_point0, qmax); const int32_t nudged_zero_point0 = lrintf(zero_point0); - int8_t* dst_ptr = lhs_qa8dx + row_idx * dst_stride; + int8_t* dst_ptr = (int8_t*)lhs_qa8dx + row_idx * dst_stride; *((float*)(dst_ptr)) = recip_scale0; dst_ptr += sizeof(float); diff --git a/aten/src/ATen/native/quantized/cpu/QuantUtils.h b/aten/src/ATen/native/quantized/cpu/QuantUtils.h index 686bbf4f8317..e81b0d87916b 100644 --- a/aten/src/ATen/native/quantized/cpu/QuantUtils.h +++ b/aten/src/ATen/native/quantized/cpu/QuantUtils.h @@ -146,12 +146,12 @@ inline TensorQuantizationParams ChooseQuantizationParams( // The arithmetic error on the zero point computed from either pair // will be roughly machine_epsilon * (sum of absolute values of terms) // so we want to use the variant that adds the smaller terms. - double zero_point_from_min = qmin - min / scale; - double zero_point_from_max = qmax - max / scale; + double zero_point_from_min = qmin - min / static_cast(scale); + double zero_point_from_max = qmax - max / static_cast(scale); double zero_point_from_min_error = - std::abs(qmin) - std::abs(min / scale); + std::abs(qmin) - std::abs(min / static_cast(scale)); double zero_point_from_max_error = - std::abs(qmax) - std::abs(max / scale); + std::abs(qmax) - std::abs(max / static_cast(scale)); double initial_zero_point = zero_point_from_min_error < zero_point_from_max_error ? zero_point_from_min diff --git a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp index 89bb033a6b03..b5b887b98bb0 100644 --- a/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp +++ b/aten/src/ATen/native/quantized/cpu/kernels/QuantizedOpKernels.cpp @@ -560,7 +560,7 @@ float hsum_sq(const int32_t* A, int len) { alignas(64) float temp[8]; _mm256_store_ps(temp, sum_ps); for (const auto k : c10::irange(8)) { - row_sum += temp[k]; + row_sum += static_cast(temp[k]); } #elif defined(CPU_CAPABILITY_AVX512) __m512 sum_ps = _mm512_setzero_ps(); @@ -574,7 +574,7 @@ float hsum_sq(const int32_t* A, int len) { alignas(64) float temp[16]; _mm512_store_ps(temp, sum_ps); for (const auto k : c10::irange(16)) { - row_sum += temp[k]; + row_sum += static_cast(temp[k]); } #endif // CPU_CAPABILITY_AVX2 or CPU_CAPABILITY_AVX512 @@ -1282,7 +1282,7 @@ template void qadd_scalar_kernel(Tensor& out, const Tensor& self, const Scalar& other) { int64_t zero_point = out.q_zero_point(); float scale = static_cast(out.q_scale()); - float inv_scale = 1.0f / scale; + float inv_scale = static_cast(1.0f / scale); int64_t self_zero_point = self.q_zero_point(); float self_scale = static_cast(self.q_scale()); @@ -2915,7 +2915,7 @@ void fake_quantize_learnable_channel_grad_kernel_cpu( // NOLINTNEXTLINE(bugprone-narrowing-conversions,cppcoreguidelines-narrowing-conversions) *dx_output = (*dy_input) * (xqi >= quant_min && xqi <= quant_max); // Calculate gradients for scale and zero point. - float xfqi = ((std::max(std::min(xqi, quant_max), quant_min) - (*zero_point_input)) * (*scale_input)); + float xfqi = static_cast((std::max(std::min(xqi, quant_max), quant_min) - (*zero_point_input)) * (*scale_input)); if (xqi < quant_min || xqi > quant_max) { *dzero_point_output = (*dy_input) * (-1) * (*scale_input) * grad_factor; *dscale_output = ((xqi < quant_min) ? ((*dy_input) * dscale_small) : ((*dy_input) * dscale_big)) * grad_factor; @@ -4415,7 +4415,7 @@ void _qmul_tensor_cpu_impl( uint8_t y_data = *(y_ptr + idx); int32_t x_val = static_cast(x_data) - x_zero_point; int32_t y_val = static_cast(y_data) - y_zero_point; - int32_t out_val = x_val * y_val; + int32_t out_val = static_cast(x_val * y_val); float out_val_f = (float)out_val * multiplier; if constexpr (std::is_same::value) { *(out_ptr + idx) = out_val_f; diff --git a/aten/src/ATen/ops/from_blob.h b/aten/src/ATen/ops/from_blob.h index 63b15ef5ca1b..a209380abb64 100644 --- a/aten/src/ATen/ops/from_blob.h +++ b/aten/src/ATen/ops/from_blob.h @@ -90,12 +90,12 @@ class TORCH_API TensorMaker { void* data_; IntArrayRef sizes_; - OptionalIntArrayRef strides_; - std::optional storage_offset_; - std::function deleter_; + OptionalIntArrayRef strides_{}; + std::optional storage_offset_{}; + std::function deleter_{}; std::unique_ptr ctx_{nullptr, detail::noopDelete}; - std::optional device_; - TensorOptions opts_; + std::optional device_{}; + TensorOptions opts_{}; bool resizeable_{}; c10::Allocator* allocator_{}; }; diff --git a/c10/core/DeviceGuard.h b/c10/core/DeviceGuard.h index 682c58a0a155..7fa366049480 100644 --- a/c10/core/DeviceGuard.h +++ b/c10/core/DeviceGuard.h @@ -182,7 +182,7 @@ class OptionalDeviceGuard { } private: - impl::InlineOptionalDeviceGuard guard_; + impl::InlineOptionalDeviceGuard guard_{}; }; // Note [Whither the DeviceGuard boilerplate] diff --git a/c10/core/StreamGuard.h b/c10/core/StreamGuard.h index c901a8a768f1..d3057823a5cd 100644 --- a/c10/core/StreamGuard.h +++ b/c10/core/StreamGuard.h @@ -143,7 +143,7 @@ struct OptionalStreamGuard { } private: - c10::impl::InlineOptionalStreamGuard guard_; + c10::impl::InlineOptionalStreamGuard guard_{}; }; /** diff --git a/c10/core/impl/PyInterpreterHooks.h b/c10/core/impl/PyInterpreterHooks.h index d68551ac6e15..4fe025d2e778 100644 --- a/c10/core/impl/PyInterpreterHooks.h +++ b/c10/core/impl/PyInterpreterHooks.h @@ -33,7 +33,6 @@ C10_DECLARE_REGISTRY( // Get the global PyInterpreter hooks instance C10_API const PyInterpreterHooksInterface& getPyInterpreterHooks(); -// Helper function to get the global interpreter C10_API PyInterpreter* getGlobalPyInterpreter(); } // namespace c10::impl diff --git a/c10/core/impl/PyObjectSlot.h b/c10/core/impl/PyObjectSlot.h index 2e1425c06779..e7d78f8360c3 100644 --- a/c10/core/impl/PyObjectSlot.h +++ b/c10/core/impl/PyObjectSlot.h @@ -14,6 +14,9 @@ using GetPyInterpreterFn = PyInterpreter* (*)(); // Global function pointer (set by csrc initialization) C10_API extern GetPyInterpreterFn g_get_pyinterpreter_fn; +// Helper function to get the global interpreter +C10_API PyInterpreter* getGlobalPyInterpreter(); + struct C10_API PyObjectSlot { public: PyObjectSlot(); diff --git a/c10/cuda/CUDACachingAllocator.h b/c10/cuda/CUDACachingAllocator.h index 84acfd78209a..7bd53d8a70b7 100644 --- a/c10/cuda/CUDACachingAllocator.h +++ b/c10/cuda/CUDACachingAllocator.h @@ -137,7 +137,7 @@ struct TraceEntry { size_t size_; MempoolId_t mempool_; trace_time_ time_{}; - std::string compile_context_; + std::string compile_context_{}; }; // Calls made by record_function will save annotations diff --git a/caffe2/utils/threadpool/WorkersPool.h b/caffe2/utils/threadpool/WorkersPool.h index 274456ffc532..5de6b1213e84 100644 --- a/caffe2/utils/threadpool/WorkersPool.h +++ b/caffe2/utils/threadpool/WorkersPool.h @@ -39,7 +39,7 @@ struct AllocAligned { #elif defined(_MSC_VER) p = _aligned_malloc(sizeof(T), kGEMMLOWPCacheLineSize); #else - auto res = posix_memalign(&p, kGEMMLOWPCacheLineSize, sizeof(T)); + auto res = posix_memalign((void**)&p, kGEMMLOWPCacheLineSize, sizeof(T)); (void)res; #endif diff --git a/torch/csrc/api/include/torch/nn/options/activation.h b/torch/csrc/api/include/torch/nn/options/activation.h index 00c7a99e6751..480e09ad4de2 100644 --- a/torch/csrc/api/include/torch/nn/options/activation.h +++ b/torch/csrc/api/include/torch/nn/options/activation.h @@ -686,23 +686,23 @@ struct TORCH_API MultiheadAttentionForwardFuncOptions { TORCH_ARG(bool, training) = true; - TORCH_ARG(Tensor, key_padding_mask); + TORCH_ARG(Tensor, key_padding_mask) = {}; TORCH_ARG(bool, need_weights) = true; - TORCH_ARG(Tensor, attn_mask); + TORCH_ARG(Tensor, attn_mask) = {}; TORCH_ARG(bool, use_separate_proj_weight) = false; - TORCH_ARG(Tensor, q_proj_weight); + TORCH_ARG(Tensor, q_proj_weight) = {}; - TORCH_ARG(Tensor, k_proj_weight); + TORCH_ARG(Tensor, k_proj_weight) = {}; - TORCH_ARG(Tensor, v_proj_weight); + TORCH_ARG(Tensor, v_proj_weight) = {}; - TORCH_ARG(Tensor, static_k); + TORCH_ARG(Tensor, static_k) = {}; - TORCH_ARG(Tensor, static_v); + TORCH_ARG(Tensor, static_v) = {}; TORCH_ARG(bool, average_attn_weights) = true; }; diff --git a/torch/csrc/api/include/torch/nn/options/batchnorm.h b/torch/csrc/api/include/torch/nn/options/batchnorm.h index 78a287207c3a..d77cfb4f0d15 100644 --- a/torch/csrc/api/include/torch/nn/options/batchnorm.h +++ b/torch/csrc/api/include/torch/nn/options/batchnorm.h @@ -73,9 +73,9 @@ namespace functional { /// F::BatchNormFuncOptions().weight(weight).bias(bias).momentum(0.1).eps(1e-05).training(false)); /// ``` struct TORCH_API BatchNormFuncOptions { - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = Tensor(); - TORCH_ARG(Tensor, bias); + TORCH_ARG(Tensor, bias) = Tensor(); TORCH_ARG(bool, training) = false; diff --git a/torch/csrc/api/include/torch/nn/options/conv.h b/torch/csrc/api/include/torch/nn/options/conv.h index bbaecbeb97b6..f10d5e9a3106 100644 --- a/torch/csrc/api/include/torch/nn/options/conv.h +++ b/torch/csrc/api/include/torch/nn/options/conv.h @@ -196,7 +196,7 @@ struct ConvFuncOptions { using padding_t = torch::nn::detail::conv_padding_t; /// optional bias of shape `(out_channels)`. Default: ``None`` - TORCH_ARG(torch::Tensor, bias); + TORCH_ARG(torch::Tensor, bias) = Tensor(); /// The stride of the convolving kernel. /// For a `D`-dim convolution, must be a single number or a list of `D` @@ -352,7 +352,7 @@ namespace functional { template struct ConvTransposeFuncOptions { /// optional bias of shape `(out_channels)`. Default: ``None`` - TORCH_ARG(torch::Tensor, bias); + TORCH_ARG(torch::Tensor, bias) = Tensor(); /// The stride of the convolving kernel. /// For a `D`-dim convolution, must be a single number or a list of `D` diff --git a/torch/csrc/api/include/torch/nn/options/embedding.h b/torch/csrc/api/include/torch/nn/options/embedding.h index 3c62b2a06852..be689f12b3bd 100644 --- a/torch/csrc/api/include/torch/nn/options/embedding.h +++ b/torch/csrc/api/include/torch/nn/options/embedding.h @@ -40,7 +40,7 @@ struct TORCH_API EmbeddingOptions { TORCH_ARG(bool, sparse) = false; /// The learnable weights of the module of shape (num_embeddings, /// embedding_dim) - TORCH_ARG(torch::Tensor, _weight); + TORCH_ARG(torch::Tensor, _weight) = Tensor(); }; // ============================================================================ @@ -136,7 +136,7 @@ struct TORCH_API EmbeddingBagOptions { TORCH_ARG(bool, sparse) = false; /// The learnable weights of the module of shape (num_embeddings, /// embedding_dim) - TORCH_ARG(torch::Tensor, _weight); + TORCH_ARG(torch::Tensor, _weight) = Tensor(); /// If ``true``, `offsets` has one additional element, where the last element /// is equivalent to the size of `indices`. This matches the CSR format. TORCH_ARG(bool, include_last_offset) = false; @@ -201,7 +201,7 @@ namespace functional { struct TORCH_API EmbeddingBagFuncOptions { /// Only used when `input` is 1D. `offsets` determines /// the starting index position of each bag (sequence) in `input`. - TORCH_ARG(torch::Tensor, offsets); + TORCH_ARG(torch::Tensor, offsets) = Tensor(); /// If given, each embedding vector with norm larger than `max_norm` is /// renormalized to have norm `max_norm`. TORCH_ARG(std::optional, max_norm) = std::nullopt; @@ -223,7 +223,7 @@ struct TORCH_API EmbeddingBagFuncOptions { /// be taken to be 1. If specified, `per_sample_weights` must have exactly the /// same shape as input and is treated as having the same `offsets`, if those /// are not None. - TORCH_ARG(torch::Tensor, per_sample_weights); + TORCH_ARG(torch::Tensor, per_sample_weights) = Tensor(); /// If ``true``, `offsets` has one additional element, where the last element /// is equivalent to the size of `indices`. This matches the CSR format. Note: /// this option is currently only supported when ``mode="sum"``. diff --git a/torch/csrc/api/include/torch/nn/options/instancenorm.h b/torch/csrc/api/include/torch/nn/options/instancenorm.h index c37832407edf..2c90a060340b 100644 --- a/torch/csrc/api/include/torch/nn/options/instancenorm.h +++ b/torch/csrc/api/include/torch/nn/options/instancenorm.h @@ -67,13 +67,13 @@ namespace functional { /// F::InstanceNormFuncOptions().running_mean(mean).running_var(variance).weight(weight).bias(bias).momentum(0.1).eps(1e-5)); /// ``` struct TORCH_API InstanceNormFuncOptions { - TORCH_ARG(Tensor, running_mean); + TORCH_ARG(Tensor, running_mean) = Tensor(); - TORCH_ARG(Tensor, running_var); + TORCH_ARG(Tensor, running_var) = Tensor(); - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = Tensor(); - TORCH_ARG(Tensor, bias); + TORCH_ARG(Tensor, bias) = Tensor(); TORCH_ARG(bool, use_input_stats) = true; diff --git a/torch/csrc/api/include/torch/nn/options/loss.h b/torch/csrc/api/include/torch/nn/options/loss.h index b004fae8cdb0..88d954c5e18b 100644 --- a/torch/csrc/api/include/torch/nn/options/loss.h +++ b/torch/csrc/api/include/torch/nn/options/loss.h @@ -131,7 +131,7 @@ struct TORCH_API BCELossOptions { reduction_t; /// A manual rescaling weight given to the loss of each batch element. - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = {}; /// Specifies the reduction to apply to the output. /// ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'`` TORCH_ARG(reduction_t, reduction) = torch::kMean; @@ -207,7 +207,7 @@ struct TORCH_API MultiMarginLossOptions { /// A manual rescaling weight given to each /// class. If given, it has to be a Tensor of size `C`. Otherwise, it is /// treated as if having all ones. - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = Tensor(); /// Specifies the reduction to apply to the output: /// ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be /// applied, @@ -365,7 +365,7 @@ struct TORCH_API MultiLabelSoftMarginLossOptions { /// A manual rescaling weight given to each /// class. If given, it has to be a Tensor of size `C`. Otherwise, it is /// treated as if having all ones. - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = Tensor(); /// Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. /// 'none': no reduction will be applied, 'mean': the sum of the output will @@ -697,7 +697,7 @@ struct TORCH_API NLLLossOptions { /// A manual rescaling weight given to each /// class. If given, it has to be a Tensor of size `C`. Otherwise, it is /// treated as if having all ones. - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = {}; /// Specifies a target value that is ignored /// and does not contribute to the input gradient. TORCH_ARG(int64_t, ignore_index) = -100; @@ -735,7 +735,7 @@ struct TORCH_API CrossEntropyLossOptions { /// A manual rescaling weight given to each class. If given, has to be a /// Tensor of size C - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = {}; /// Specifies a target value that is ignored /// and does not contribute to the input gradient. TORCH_ARG(int64_t, ignore_index) = -100; @@ -774,12 +774,12 @@ struct TORCH_API BCEWithLogitsLossOptions { reduction_t; /// A manual rescaling weight given to the loss of each batch element. /// If given, has to be a Tensor of size `nbatch`. - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = {}; /// Specifies the reduction to apply to the output. Default: Mean TORCH_ARG(reduction_t, reduction) = torch::kMean; /// A weight of positive examples. /// Must be a vector with length equal to the number of classes. - TORCH_ARG(Tensor, pos_weight); + TORCH_ARG(Tensor, pos_weight) = {}; }; namespace functional { diff --git a/torch/csrc/api/include/torch/nn/options/normalization.h b/torch/csrc/api/include/torch/nn/options/normalization.h index 439f8b2a9808..6097a2923af2 100644 --- a/torch/csrc/api/include/torch/nn/options/normalization.h +++ b/torch/csrc/api/include/torch/nn/options/normalization.h @@ -43,9 +43,9 @@ struct TORCH_API LayerNormFuncOptions { /// input shape from an expected input. TORCH_ARG(std::vector, normalized_shape); - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = {}; - TORCH_ARG(Tensor, bias); + TORCH_ARG(Tensor, bias) = {}; /// a value added to the denominator for numerical stability. ``Default: /// 1e-5``. @@ -177,9 +177,9 @@ struct TORCH_API GroupNormFuncOptions { /// number of groups to separate the channels into TORCH_ARG(int64_t, num_groups); - TORCH_ARG(Tensor, weight); + TORCH_ARG(Tensor, weight) = {}; - TORCH_ARG(Tensor, bias); + TORCH_ARG(Tensor, bias) = {}; /// a value added to the denominator for numerical stability. Default: 1e-5 TORCH_ARG(double, eps) = 1e-5; diff --git a/torch/csrc/api/include/torch/nn/options/pooling.h b/torch/csrc/api/include/torch/nn/options/pooling.h index 4449a16f2206..3934f326c8a5 100644 --- a/torch/csrc/api/include/torch/nn/options/pooling.h +++ b/torch/csrc/api/include/torch/nn/options/pooling.h @@ -456,7 +456,7 @@ struct FractionalMaxPoolOptions { using ExpandingArrayDouble = torch::ExpandingArray; TORCH_ARG(std::optional, output_ratio) = std::nullopt; - TORCH_ARG(torch::Tensor, _random_samples); + TORCH_ARG(torch::Tensor, _random_samples) = Tensor(); }; /// `FractionalMaxPoolOptions` specialized for the `FractionalMaxPool2d` module. diff --git a/torch/csrc/api/include/torch/optim/adam.h b/torch/csrc/api/include/torch/optim/adam.h index c75aac357717..6c06e4030cf4 100644 --- a/torch/csrc/api/include/torch/optim/adam.h +++ b/torch/csrc/api/include/torch/optim/adam.h @@ -38,7 +38,7 @@ struct TORCH_API AdamParamState TORCH_ARG(int64_t, step) = 0; TORCH_ARG(torch::Tensor, exp_avg); TORCH_ARG(torch::Tensor, exp_avg_sq); - TORCH_ARG(torch::Tensor, max_exp_avg_sq); + TORCH_ARG(torch::Tensor, max_exp_avg_sq) = {}; public: void serialize(torch::serialize::InputArchive& archive) override; diff --git a/torch/csrc/api/include/torch/optim/adamw.h b/torch/csrc/api/include/torch/optim/adamw.h index 8b8c4c45f406..d656921a719d 100644 --- a/torch/csrc/api/include/torch/optim/adamw.h +++ b/torch/csrc/api/include/torch/optim/adamw.h @@ -38,7 +38,7 @@ struct TORCH_API AdamWParamState TORCH_ARG(int64_t, step) = 0; TORCH_ARG(torch::Tensor, exp_avg); TORCH_ARG(torch::Tensor, exp_avg_sq); - TORCH_ARG(torch::Tensor, max_exp_avg_sq); + TORCH_ARG(torch::Tensor, max_exp_avg_sq) = {}; public: void serialize(torch::serialize::InputArchive& archive) override; diff --git a/torch/csrc/api/include/torch/optim/lbfgs.h b/torch/csrc/api/include/torch/optim/lbfgs.h index dc90113955fd..3d5f1832cf60 100644 --- a/torch/csrc/api/include/torch/optim/lbfgs.h +++ b/torch/csrc/api/include/torch/optim/lbfgs.h @@ -39,9 +39,9 @@ struct TORCH_API LBFGSParamState TORCH_ARG(int64_t, n_iter) = 0; TORCH_ARG(double, t) = 0; TORCH_ARG(double, prev_loss) = 0; - TORCH_ARG(Tensor, d); - TORCH_ARG(Tensor, H_diag); - TORCH_ARG(Tensor, prev_flat_grad); + TORCH_ARG(Tensor, d) = {}; + TORCH_ARG(Tensor, H_diag) = {}; + TORCH_ARG(Tensor, prev_flat_grad) = {}; TORCH_ARG(std::deque, old_dirs); TORCH_ARG(std::deque, old_stps); TORCH_ARG(std::deque, ro); diff --git a/torch/csrc/api/include/torch/optim/rmsprop.h b/torch/csrc/api/include/torch/optim/rmsprop.h index c6581b87a4b6..7b6b9dea5649 100644 --- a/torch/csrc/api/include/torch/optim/rmsprop.h +++ b/torch/csrc/api/include/torch/optim/rmsprop.h @@ -43,8 +43,8 @@ struct TORCH_API RMSpropParamState : public OptimizerCloneableParamState { TORCH_ARG(int64_t, step) = 0; TORCH_ARG(torch::Tensor, square_avg); - TORCH_ARG(torch::Tensor, momentum_buffer); - TORCH_ARG(torch::Tensor, grad_avg); + TORCH_ARG(torch::Tensor, momentum_buffer) = {}; + TORCH_ARG(torch::Tensor, grad_avg) = {}; public: void serialize(torch::serialize::InputArchive& archive) override; diff --git a/torch/csrc/autograd/graph_task.h b/torch/csrc/autograd/graph_task.h index b34d15c7d05c..018beaffdaaf 100644 --- a/torch/csrc/autograd/graph_task.h +++ b/torch/csrc/autograd/graph_task.h @@ -122,7 +122,7 @@ struct GraphTask : std::enable_shared_from_this { // Note: this field is not ready to be used until the proper // `thread_locals_.set_grad_mode()` call in the constructor. - at::ThreadLocalState thread_locals_; + at::ThreadLocalState thread_locals_ = at::ThreadLocalState(); std::unordered_set leaf_streams; diff --git a/torch/csrc/autograd/utils/lambda_post_hook.h b/torch/csrc/autograd/utils/lambda_post_hook.h index 5f0f5626a4ad..e43d7a23876d 100644 --- a/torch/csrc/autograd/utils/lambda_post_hook.h +++ b/torch/csrc/autograd/utils/lambda_post_hook.h @@ -36,7 +36,7 @@ class LambdaPostHook : public torch::autograd::FunctionPostHook { protected: std::function fn_; - compiled_fn_type compiled_fn_; + compiled_fn_type compiled_fn_{}; }; } // namespace torch::autograd::utils diff --git a/torch/csrc/distributed/c10d/FlightRecorder.hpp b/torch/csrc/distributed/c10d/FlightRecorder.hpp index a04a597b5ccb..b0974495a87a 100644 --- a/torch/csrc/distributed/c10d/FlightRecorder.hpp +++ b/torch/csrc/distributed/c10d/FlightRecorder.hpp @@ -176,9 +176,9 @@ struct FlightRecorder { size_t max_entries_ = 0; size_t next_ = 0; size_t id_ = 0; - std::map> all_pg_status_; + std::map> all_pg_status_ = {}; std::map, std::vector> - pg_name_to_ranks_; + pg_name_to_ranks_ = {}; std::string comm_lib_version_; std::optional record( diff --git a/torch/csrc/distributed/c10d/NCCLUtils.hpp b/torch/csrc/distributed/c10d/NCCLUtils.hpp index fdd50f69ef3d..fcd55b6a655e 100644 --- a/torch/csrc/distributed/c10d/NCCLUtils.hpp +++ b/torch/csrc/distributed/c10d/NCCLUtils.hpp @@ -367,7 +367,7 @@ class NCCLComm { int rank_{}; // Optional reason for communicator failure, provided by ProcessGroupNCCL for // better error messaging. - std::optional commFailureReason_; + std::optional commFailureReason_{}; bool initialized_{false}; // Whether this communicator is using nonblocking mode. Recorded during comm // creation or split. For safety, we give a default value of true (more diff --git a/torch/csrc/distributed/c10d/ParamCommsUtils.hpp b/torch/csrc/distributed/c10d/ParamCommsUtils.hpp index 678c98e91a0b..d011b0e42ed1 100644 --- a/torch/csrc/distributed/c10d/ParamCommsUtils.hpp +++ b/torch/csrc/distributed/c10d/ParamCommsUtils.hpp @@ -91,7 +91,7 @@ class TORCH_API ParamCommsDebugInfo : public c10::DebugInfoBase { std::vector outputSplitSizes_; int globalRankStart_{}; int globalRankStride_{}; - std::vector groupRanks_; + std::vector groupRanks_{}; }; #define RECORD_PARAM_COMMS( \ diff --git a/torch/csrc/distributed/c10d/ProcessGroup.hpp b/torch/csrc/distributed/c10d/ProcessGroup.hpp index 8c4a657fd7ee..5a06a386d5ca 100644 --- a/torch/csrc/distributed/c10d/ProcessGroup.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroup.hpp @@ -11,6 +11,7 @@ #include #include +#include // ************************************************************************* // PROCESS GROUP collective communication API IS BEING CHANGED BETWEEN // versions 1.7 and 1.8. diff --git a/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp b/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp index 07f0e26c2da9..442cb490743b 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp @@ -284,7 +284,7 @@ class AsyncAllreduceWork : public ProcessGroupGloo::AsyncWork { reduceOp(std::move(reduceOp)), tag(tag) {} - std::vector inputs; + std::vector inputs{}; const ReduceOp reduceOp; const uint32_t tag; @@ -399,7 +399,7 @@ class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork { inputs(inputs), tag(tag) {} - std::vector inputs; + std::vector inputs{}; const uint32_t tag; // We share dimensionality about the sparse tensors before collecting diff --git a/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp b/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp index bff49938791d..3386d8ee0a66 100644 --- a/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp +++ b/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp @@ -732,7 +732,7 @@ class TORCH_API ProcessGroupNCCL : public Backend { std::condition_variable workMetaListCV_; // Heartbeat of watchdog thread. - std::atomic_uint64_t heartbeat_; + std::atomic_uint64_t heartbeat_{}; // Whether or not to propagate detected errors to all ranks in the same PG // through TCPStore. @@ -1319,7 +1319,7 @@ class TORCH_API ProcessGroupNCCL : public Backend { int traceBufferSize_; // We gate the cudaEventCache so that we can roll it out gradually. - std::atomic cudaEventCacheEnabled_; + std::atomic cudaEventCacheEnabled_{}; std::thread onCompletionHookThread_; @@ -1327,7 +1327,7 @@ class TORCH_API ProcessGroupNCCL : public Backend { std::atomic terminateProcessGroup_; // Whether there are hooks pending to be fired - std::atomic hasPendingHooks_; + std::atomic hasPendingHooks_{}; // This is the signal from watchdog threads to indicate whether the monitor // thread should dump. Making it static so that it is accessible from all the @@ -1416,11 +1416,11 @@ class TORCH_API ProcessGroupNCCL : public Backend { // Whether or not to create start CUDAEvent and enable timing for start // and end events. Note that enableTiming_ is always true if desyncDebug_ // is set to true. - std::atomic enableTiming_; + std::atomic enableTiming_{}; // Flag to enable the print of hash value of input/output of collectives for // verification. - std::atomic enableCollectiveHashDebug_; + std::atomic enableCollectiveHashDebug_{}; // Whether or not TORCH_NCCL_AVOID_RECORD_STREAMS was set bool avoidRecordStreams_ = false; diff --git a/torch/csrc/distributed/c10d/TCPStore.hpp b/torch/csrc/distributed/c10d/TCPStore.hpp index 2caab088a609..75561cf597ae 100644 --- a/torch/csrc/distributed/c10d/TCPStore.hpp +++ b/torch/csrc/distributed/c10d/TCPStore.hpp @@ -41,7 +41,7 @@ class TCPServer; class TCPClient; struct SocketAddress { - std::string host; + std::string host{}; std::uint16_t port{}; }; diff --git a/torch/csrc/distributed/c10d/TCPStoreBackend.hpp b/torch/csrc/distributed/c10d/TCPStoreBackend.hpp index d5f7f0248bba..3eb148f2bef8 100644 --- a/torch/csrc/distributed/c10d/TCPStoreBackend.hpp +++ b/torch/csrc/distributed/c10d/TCPStoreBackend.hpp @@ -65,7 +65,7 @@ class BackgroundThread { private: std::atomic is_running_{false}; - std::thread daemonThread_; + std::thread daemonThread_{}; }; std::unique_ptr create_tcpstore_backend( diff --git a/torch/csrc/distributed/c10d/Utils.hpp b/torch/csrc/distributed/c10d/Utils.hpp index ff68af5b2b5d..c7a2e3523ae4 100644 --- a/torch/csrc/distributed/c10d/Utils.hpp +++ b/torch/csrc/distributed/c10d/Utils.hpp @@ -437,7 +437,7 @@ inline at::Tensor newLikeFlat( } at::DeviceGuard gpuGuard(device); std::vector sizes{static_cast(tensors[deviceIdx].size())}; - std::vector strides{t.numel()}; + std::vector strides{static_cast(t.numel())}; sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end()); strides.insert(strides.end(), t.strides().begin(), t.strides().end()); return at::empty_strided( diff --git a/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp b/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp index 6aefca8eabd3..7d3eb5038565 100644 --- a/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp +++ b/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp @@ -62,7 +62,7 @@ class TORCH_API StoreCollectives : public ControlCollectives { int rank_; int worldSize_; - c10::FastSet seenKeys_; + c10::FastSet seenKeys_{}; }; } // namespace c10d diff --git a/torch/csrc/distributed/c10d/reducer.hpp b/torch/csrc/distributed/c10d/reducer.hpp index 4e5ed6a9a5c3..6707975d38ac 100644 --- a/torch/csrc/distributed/c10d/reducer.hpp +++ b/torch/csrc/distributed/c10d/reducer.hpp @@ -26,8 +26,8 @@ namespace c10d { -constexpr int kDefaultFirstBucketBytes = 1024 * 1024; -constexpr int kDefaultBucketBytesCap = 25 * 1024 * 1024; +constexpr int kDefaultFirstBucketBytes = int(1024 * 1024); +constexpr int kDefaultBucketBytesCap = int(25 * 1024 * 1024); // Collect runtime stats once for every kDDPRuntimeLoggingSampleRate iterations. constexpr int kDDPRuntimeLoggingSampleRate = 100; diff --git a/torch/csrc/inductor/aoti_runtime/utils.h b/torch/csrc/inductor/aoti_runtime/utils.h index 49255a858d4d..4eee0b09a83a 100644 --- a/torch/csrc/inductor/aoti_runtime/utils.h +++ b/torch/csrc/inductor/aoti_runtime/utils.h @@ -244,7 +244,7 @@ class RAIIC10IValueHandle { class MaybeOwningAtenTensorHandle { public: - MaybeOwningAtenTensorHandle() : handle_(nullptr) {} + MaybeOwningAtenTensorHandle() : handle_(nullptr), raii_handle_() {} // We skip copy constructor as MaybeOwningAtenTensorHandle might be RAII which // makes it undefined. MaybeOwningAtenTensorHandle(const MaybeOwningAtenTensorHandle& other) = diff --git a/torch/csrc/jit/frontend/function_schema_parser.cpp b/torch/csrc/jit/frontend/function_schema_parser.cpp index 24b3adfd98cd..4c824e6997bf 100644 --- a/torch/csrc/jit/frontend/function_schema_parser.cpp +++ b/torch/csrc/jit/frontend/function_schema_parser.cpp @@ -111,7 +111,7 @@ struct SchemaParser { L.expect(':'); name = fmt::format("{}::{}", name, L.expect(TK_IDENT).text_view()); } - std::string overload_name; + std::string overload_name = ""; if (L.nextIf('.')) { overload_name = L.expect(TK_IDENT).text(); } diff --git a/torch/csrc/jit/frontend/lexer.h b/torch/csrc/jit/frontend/lexer.h index 98c235bc24f1..0faf6ff24da4 100644 --- a/torch/csrc/jit/frontend/lexer.h +++ b/torch/csrc/jit/frontend/lexer.h @@ -412,7 +412,11 @@ struct Token { struct Lexer { explicit Lexer(std::shared_ptr source) - : source(std::move(source)), shared(sharedParserData()) { + : source(std::move(source)), + + indent_stack(), + next_tokens(), + shared(sharedParserData()) { auto first_indent = lexRaw(true); indent_stack.push_back(first_indent.range.size()); lex(); diff --git a/torch/csrc/jit/frontend/sugared_value.cpp b/torch/csrc/jit/frontend/sugared_value.cpp index f9a80cf4da5e..0e9f0c9c2178 100644 --- a/torch/csrc/jit/frontend/sugared_value.cpp +++ b/torch/csrc/jit/frontend/sugared_value.cpp @@ -867,7 +867,7 @@ std::shared_ptr TorchCheckValue::call( } } - if (!args.empty()) { + if (args.size() >= 1) { if (found_cond_kwarg) { throw( ErrorReport(loc) diff --git a/torch/csrc/jit/ir/ir.cpp b/torch/csrc/jit/ir/ir.cpp index d7aaac509590..36483b928918 100644 --- a/torch/csrc/jit/ir/ir.cpp +++ b/torch/csrc/jit/ir/ir.cpp @@ -1769,7 +1769,7 @@ Node* Graph::createTupleSlice( int64_t i = beg; for ([[maybe_unused]] const auto j : c10::irange(num_values)) { - auto idx = insertConstant(IValue(i)); + auto idx = insertConstant(IValue(static_cast(i))); auto tupleIndex = insertNode(createTupleIndex(tup, idx, tt->elements()[i])); new_vals.push_back(tupleIndex->output()); diff --git a/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp b/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp index 23752d5f041c..8d847ddeb533 100644 --- a/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp +++ b/torch/csrc/jit/mobile/compatibility/model_compatibility.cpp @@ -16,6 +16,10 @@ #include #include +namespace c10 { +TypePtr parseType(const std::string& pythonStr); +} // namespace c10 + namespace torch::jit { using caffe2::serialize::FileAdapter; @@ -63,7 +67,8 @@ std::vector get_bytecode_ivalues(PyTorchStreamReader& reader) { /********************** Bytecode **********************/ // Forward declare - +uint64_t _get_model_bytecode_version( + const std::vector& bytecode_ivalues); static uint64_t _get_model_bytecode_version_from_bytes(char* data, size_t size); uint64_t _get_model_bytecode_version(std::istream& in) { @@ -245,6 +250,8 @@ std::unordered_map _get_model_ops_and_info( /********************** Get Type Table **********************/ // Forward declare +std::unordered_set _get_mobile_model_contained_types( + const std::vector& bytecode_ivalues); std::unordered_set _get_mobile_model_contained_types( std::istream& in) { diff --git a/torch/csrc/jit/mobile/compatibility/model_compatibility.h b/torch/csrc/jit/mobile/compatibility/model_compatibility.h index 03be3dbeb1c6..59ae2b1f23a4 100644 --- a/torch/csrc/jit/mobile/compatibility/model_compatibility.h +++ b/torch/csrc/jit/mobile/compatibility/model_compatibility.h @@ -93,7 +93,7 @@ enum ModelCompatibilityStatus { struct ModelCompatCheckResult { ModelCompatibilityStatus status; - std::vector errors; + std::vector errors{}; }; // Takes in information about a runtime and a model and returns if the two are // compatible with one another. diff --git a/torch/csrc/jit/mobile/compatibility/runtime_compatibility.cpp b/torch/csrc/jit/mobile/compatibility/runtime_compatibility.cpp index 35aeb435330e..c3c86a7d2698 100644 --- a/torch/csrc/jit/mobile/compatibility/runtime_compatibility.cpp +++ b/torch/csrc/jit/mobile/compatibility/runtime_compatibility.cpp @@ -7,6 +7,10 @@ #include #include +namespace c10 { +TypePtr parseType(const std::string& pythonStr); +} // namespace c10 + namespace torch::jit { uint64_t _get_runtime_bytecode_version() { diff --git a/torch/csrc/jit/mobile/flatbuffer_loader.h b/torch/csrc/jit/mobile/flatbuffer_loader.h index b34bb8809380..24c670e01f79 100644 --- a/torch/csrc/jit/mobile/flatbuffer_loader.h +++ b/torch/csrc/jit/mobile/flatbuffer_loader.h @@ -121,6 +121,13 @@ TORCH_API mobile::Module parse_flatbuffer_no_object( size_t size, std::optional device); +TORCH_API mobile::Module parse_and_initialize_mobile_module( + void* data, + size_t, + std::optional, + ExtraFilesMap* extra_files, + bool should_copy_tensor_memory); + // no op, TODO(qihan) delete TORCH_API bool register_flatbuffer_loader(); diff --git a/torch/csrc/jit/mobile/import.cpp b/torch/csrc/jit/mobile/import.cpp index 6a0ba7e038ea..089a0df564a0 100644 --- a/torch/csrc/jit/mobile/import.cpp +++ b/torch/csrc/jit/mobile/import.cpp @@ -87,6 +87,8 @@ using caffe2::serialize::MemoryReadAdapter; using caffe2::serialize::PyTorchStreamReader; using caffe2::serialize::ReadAdapterInterface; +OpCode parseOpCode(const char* str); + TypePtr resolveTypeNameMobile( const c10::QualifiedName& qn, const std::shared_ptr& compilation_unit) { @@ -214,7 +216,7 @@ class BytecodeDeserializer final { mobile::Function* function); std::shared_ptr compilation_unit_; std::unordered_set imported_libs_; - std::unique_ptr reader_; + std::unique_ptr reader_{}; std::optional device_; uint64_t module_load_options_; // From `version` or `.data/version` in model.ptl and it's compute diff --git a/torch/csrc/jit/mobile/interpreter.cpp b/torch/csrc/jit/mobile/interpreter.cpp index b5e67cd83cbb..c2197fcdcb35 100644 --- a/torch/csrc/jit/mobile/interpreter.cpp +++ b/torch/csrc/jit/mobile/interpreter.cpp @@ -17,7 +17,7 @@ #include namespace torch::jit { - +std::ostream& operator<<(std::ostream& out, Instruction inst); namespace mobile { InterpreterState::InterpreterState(const Code& code) { enterFrame(code); diff --git a/torch/csrc/jit/mobile/parse_bytecode.cpp b/torch/csrc/jit/mobile/parse_bytecode.cpp index 1a1e278e371f..eb95976d451b 100644 --- a/torch/csrc/jit/mobile/parse_bytecode.cpp +++ b/torch/csrc/jit/mobile/parse_bytecode.cpp @@ -9,7 +9,7 @@ #include namespace torch::jit { - +OpCode parseOpCode(const char* str); using c10::IValue; IValue expect_field( diff --git a/torch/csrc/jit/mobile/train/optim/sgd.cpp b/torch/csrc/jit/mobile/train/optim/sgd.cpp index 1523c5629a9c..ae1a40e10621 100644 --- a/torch/csrc/jit/mobile/train/optim/sgd.cpp +++ b/torch/csrc/jit/mobile/train/optim/sgd.cpp @@ -84,7 +84,7 @@ Tensor SGD::step(const LossClosure& closure) { loss = closure(); } for (auto& group : param_groups_) { - auto& options = group.options(); + auto& options = static_cast(group.options()); auto weight_decay = options.weight_decay(); auto momentum = options.momentum(); auto dampening = options.dampening(); diff --git a/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp b/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp index af3a0d641016..680f7683009c 100644 --- a/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp +++ b/torch/csrc/jit/passes/frozen_ops_to_mkldnn.cpp @@ -272,7 +272,8 @@ Operation createUnaryOp( TORCH_INTERNAL_ASSERT( a_it.get_desc().get_size() % elementSize(a.scalar_type()) == 0); - auto out_aten = at::from_blob(out_raw_data, nelem, a_options_with_strided); + auto out_aten = at::from_blob( + out_raw_data, {static_cast(nelem)}, a_options_with_strided); aten_op(out_aten, in_aten); push(stack, out); }; diff --git a/torch/csrc/jit/passes/onnx/helper.h b/torch/csrc/jit/passes/onnx/helper.h index cad60e8816d3..09b31576998a 100644 --- a/torch/csrc/jit/passes/onnx/helper.h +++ b/torch/csrc/jit/passes/onnx/helper.h @@ -28,6 +28,9 @@ TORCH_API ValueToParamPairMap buildValueToParamsMap(Block* b, const ParamMap& paramsDict); TORCH_API void eraseUnusedValuesFromMap(ValueToParamPairMap& valsToParamsMap); TORCH_API void eraseUnusedBlockInputs(Block* b); +TORCH_API void buildParamsMapFromValueToParamsMap( + const ValueToParamPairMap& valsToParamsMap, + ParamMap& paramsDict); TORCH_API Node* addNodeToBlock( Block* block, diff --git a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp index ccb6e0bc163a..452b18f3efc3 100644 --- a/torch/csrc/jit/passes/onnx/shape_type_inference.cpp +++ b/torch/csrc/jit/passes/onnx/shape_type_inference.cpp @@ -1439,8 +1439,8 @@ void ComputeConstant(Node* n, int opset_version) { for (auto cur_dim : shape_vector_0) { num_elements *= cur_dim.static_size(); } - dims.emplace_back( - c10::ShapeSymbol::fromStaticSize(num_elements)); + dims.emplace_back(c10::ShapeSymbol::fromStaticSize( + static_cast(num_elements))); } } } diff --git a/torch/csrc/jit/passes/quantization/quantization_patterns.h b/torch/csrc/jit/passes/quantization/quantization_patterns.h index e30688ed6e21..86d7b5857c49 100644 --- a/torch/csrc/jit/passes/quantization/quantization_patterns.h +++ b/torch/csrc/jit/passes/quantization/quantization_patterns.h @@ -16,7 +16,7 @@ struct QuantFusionInfo { std::string quantized_op_name; std::string pattern; std::string replacement; - std::vector filters; + std::vector filters = {}; }; namespace { diff --git a/torch/csrc/jit/passes/tensorexpr_fuser.cpp b/torch/csrc/jit/passes/tensorexpr_fuser.cpp index 672a9949c6b9..bb052fc8421f 100644 --- a/torch/csrc/jit/passes/tensorexpr_fuser.cpp +++ b/torch/csrc/jit/passes/tensorexpr_fuser.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/torch/csrc/jit/python/python_arg_flatten.cpp b/torch/csrc/jit/python/python_arg_flatten.cpp index b71f21d043a3..655bbb5edac3 100644 --- a/torch/csrc/jit/python/python_arg_flatten.cpp +++ b/torch/csrc/jit/python/python_arg_flatten.cpp @@ -78,7 +78,8 @@ void flatten_rec(PyObject* obj, ParsedArgs& args) { args.desc.metadata.emplace_back(var); args.desc.structure.push_back(D::Bool); } else if (PyLong_Check(obj)) { // Wrap longs in Long tensors - at::Tensor var = scalar_to_tensor(at::Scalar(THPUtils_unpackLong(obj))); + at::Tensor var = scalar_to_tensor( + at::Scalar(static_cast(THPUtils_unpackLong(obj)))); args.vars.push_back(var); args.desc.metadata.emplace_back(var); args.desc.structure.push_back(D::Long); diff --git a/torch/csrc/jit/runtime/instruction.h b/torch/csrc/jit/runtime/instruction.h index fbaca4b6ea78..73c78adbda03 100644 --- a/torch/csrc/jit/runtime/instruction.h +++ b/torch/csrc/jit/runtime/instruction.h @@ -95,5 +95,6 @@ std::ostream& operator<<(std::ostream& out, Instruction inst); bool isOpSupportedInMobile(OpCode op); char const* toString(OpCode op); OpCode parseOpCode(const char* str); +std::ostream& operator<<(std::ostream& out, Instruction inst); } // namespace torch::jit diff --git a/torch/csrc/jit/runtime/register_prim_ops.cpp b/torch/csrc/jit/runtime/register_prim_ops.cpp index 4aa098d870f5..d59b93190e36 100644 --- a/torch/csrc/jit/runtime/register_prim_ops.cpp +++ b/torch/csrc/jit/runtime/register_prim_ops.cpp @@ -1710,7 +1710,7 @@ int64_t stringFindImpl( bool reverse = false) { int64_t size = string.size(); if (start < 0) { - start = std::max(int64_t(0), size + start); + start = std::max(int64_t(0), int64_t(size + start)); } if (end < 0) { end = std::max(int64_t(0), int64_t(size + end + 1)); @@ -1964,7 +1964,7 @@ static const std::vector stringOpGenArgs{ return; } if (start < 0) { - start = std::max(int64_t(0), size + start); + start = std::max(int64_t(0), int64_t(size + start)); } if (end < 0) { end = std::max(int64_t(0), int64_t(size + end + 1)); @@ -1993,7 +1993,7 @@ static const std::vector stringOpGenArgs{ std::string string = pop(stack).toStringRef(); int64_t size = string.size(); if (start < 0) { - start = std::max(int64_t(0), (size + start)); + start = std::max(int64_t(0), int64_t(size + start)); } if (end < 0) { end = std::max(int64_t(0), int64_t(size + end + 1)); @@ -2019,7 +2019,7 @@ static const std::vector stringOpGenArgs{ std::string string = pop(stack).toStringRef(); int64_t size = string.size(); if (start < 0) { - start = std::max(int64_t(0), (size + start)); + start = std::max(int64_t(0), int64_t(size + start)); } if (end < 0) { end = std::max(int64_t(0), int64_t(size + end + 1)); diff --git a/torch/csrc/jit/runtime/static/impl.cpp b/torch/csrc/jit/runtime/static/impl.cpp index 0a6e0b3564ad..78378b04b4a6 100644 --- a/torch/csrc/jit/runtime/static/impl.cpp +++ b/torch/csrc/jit/runtime/static/impl.cpp @@ -1098,7 +1098,7 @@ namespace { void destroyNodeOutputs(ProcessedNode& p_node) { const auto borrows_outputs = borrowsOutputs(p_node.node()->kind()); - const auto num_outputs = p_node.num_outputs(); + const auto num_outputs = static_cast(p_node.num_outputs()); for (const auto i : c10::irange(num_outputs)) { auto& output = p_node.Output(i); if (doesNotHeapAllocateWhenStoredInIValue(*output.type())) { @@ -1863,7 +1863,7 @@ bool BlockRunner::check_for_memory_leak( const auto num_nodes = static_cast(nodes_.size()); for (const auto n : c10::irange(num_nodes)) { auto& pnode = nodes_[n]; - const auto num_outputs = pnode.num_outputs(); + const auto num_outputs = static_cast(pnode.num_outputs()); for (const auto i : c10::irange(num_outputs)) { const IValue* ival = &pnode.Output(i); const Value* val = pnode.node()->output(i); @@ -1943,7 +1943,7 @@ bool BlockRunner::checkOutputTensorMemoryLeaks() { const auto num_nodes = static_cast(nodes_.size()); for (const auto n : c10::irange(num_nodes)) { auto& pnode = nodes_[n]; - const auto num_outputs = pnode.num_outputs(); + const auto num_outputs = static_cast(pnode.num_outputs()); for (const auto i : c10::irange(num_outputs)) { const IValue* ival = &pnode.Output(i); const Value* val = pnode.node()->output(i); @@ -2042,7 +2042,7 @@ ProcessedFunction::ProcessedFunction( stack.emplace_back(static_cast(size)); } node_op(stack); - const auto num_outputs = pnode->num_outputs(); + const auto num_outputs = static_cast(pnode->num_outputs()); TORCH_DCHECK_EQ(stack.size(), num_outputs); for (const auto i : c10::irange(num_outputs)) { pnode->Output(i) = std::move(stack[i]); @@ -2158,7 +2158,7 @@ bool ProcessedNode::verify_no_memory_overlap(bool force_check) const { } bool ProcessedNode::verify_outputs_dont_overlap_each_other() const { - const auto n_outputs = num_outputs(); + const auto n_outputs = static_cast(num_outputs()); for (const auto i : c10::irange(n_outputs)) { if (!Output(i).isTensor()) { continue; @@ -2196,7 +2196,7 @@ bool ProcessedNode::verify_inputs_dont_overlap_outputs(bool force_check) const { return true; } const auto n_inputs = static_cast(inputs_.size()); - const auto n_outputs = num_outputs(); + const auto n_outputs = static_cast(num_outputs()); for (const auto i : c10::irange(n_inputs)) { const IValue* in = &Input(i); if (!in->isTensor()) { @@ -2235,7 +2235,7 @@ bool ProcessedNode::check_and_correct_overlap_with( void ProcessedNode::verify_and_correct_memory_overlap() { const auto n_inputs = static_cast(inputs_.size()); - const auto n_outputs = num_outputs(); + const auto n_outputs = static_cast(num_outputs()); for (const auto i : c10::irange(n_inputs)) { const IValue& in = Input(i); if (!in.isTensor()) { diff --git a/torch/csrc/jit/runtime/static/impl.h b/torch/csrc/jit/runtime/static/impl.h index 24f8f01d7547..b25f63c939b0 100644 --- a/torch/csrc/jit/runtime/static/impl.h +++ b/torch/csrc/jit/runtime/static/impl.h @@ -142,9 +142,9 @@ class TORCH_API ManagedTensorRanges { // Maps Node* to the set of managed tensors that are now available // for reuse after this node. - c10::FastMap> node_to_newly_free_tensors_; + c10::FastMap> node_to_newly_free_tensors_{}; // Maps each Value* to its lifetime (start node index, end node index) - c10::FastMap value_lifetimes_; + c10::FastMap value_lifetimes_{}; }; struct TORCH_API StaticModuleOptions { @@ -395,7 +395,7 @@ class BlockInfo { c10::FastSet managed_output_tensor_values_; c10::FastSet leaked_values_; - ManagedTensorRanges managed_tensor_ranges_; + ManagedTensorRanges managed_tensor_ranges_{}; // The index of this block's inputs in the shared values_ array. const uint16_t input_idx_; @@ -549,7 +549,7 @@ class TORCH_API StaticModule { // IValue table (defined by prim::Constant nodes) std::vector constants_; // The functions to be called by corresponding ProcessedNode. - std::vector functions_; + std::vector functions_{}; // A list of pre-processed nodes from which ProcessedNode are created per // StaticRuntime instance. std::vector nodes_; diff --git a/torch/csrc/jit/runtime/static/memory_planner.h b/torch/csrc/jit/runtime/static/memory_planner.h index d9755d83048c..018b8947a07c 100644 --- a/torch/csrc/jit/runtime/static/memory_planner.h +++ b/torch/csrc/jit/runtime/static/memory_planner.h @@ -35,7 +35,7 @@ class StorageGroup { // allocated for all tensors in this storage group. Initially it // is zero, eventually it gets updated by the MemoryPlanner. size_t max_tensor_size_ = 0; - std::vector group_; + std::vector group_{}; }; // A contiguous buffer of `StorageImpl`s @@ -263,7 +263,7 @@ class MemoryPlanner { // to an ordinary "strong reference" state. std::vector borrowed_ivalues_needing_incref_; - std::vector> managed_output_tensors_; + std::vector> managed_output_tensors_{}; at::DataPtr buffer_; // allocated each time we call Run() uint8_t* buffer_start_{nullptr}; uint8_t* buffer_end_{nullptr}; @@ -292,7 +292,7 @@ class StandardMemoryPlanner : public MemoryPlanner { void allocateManagedTensors() override; void deallocateManagedTensors() override; - std::vector managed_tensors_; + std::vector managed_tensors_{}; }; } // namespace torch::jit diff --git a/torch/csrc/jit/serialization/export.cpp b/torch/csrc/jit/serialization/export.cpp index 59ed5281db6b..6184889e5f10 100644 --- a/torch/csrc/jit/serialization/export.cpp +++ b/torch/csrc/jit/serialization/export.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/torch/csrc/jit/serialization/export_module.cpp b/torch/csrc/jit/serialization/export_module.cpp index 36c1804a06b7..e0ded27d375b 100644 --- a/torch/csrc/jit/serialization/export_module.cpp +++ b/torch/csrc/jit/serialization/export_module.cpp @@ -661,10 +661,10 @@ void ScriptModuleSerializer::writeByteCode( BackendDebugInfoRecorder debug_info_recorder; int64_t version_to_write = caffe2::serialize::kProducedBytecodeVersion; - elements.emplace_back(version_to_write); + elements.emplace_back(static_cast(version_to_write)); std::vector debug_info_elements; // Always save debug handles - debug_info_elements.emplace_back(version_to_write); + debug_info_elements.emplace_back(static_cast(version_to_write)); mobile::Module mobile_module = jitModuleToMobile(module, getOptionsFromGlobal()); @@ -913,7 +913,7 @@ void save_jit_module_to_write_func( const std::function& writer_func) { (void)save_mobile_debug_info; auto buffer = save_jit_module_to_bytes(module, extra_files); - writer_func(buffer->data(), buffer->size()); + writer_func(reinterpret_cast(buffer->data()), buffer->size()); } void ExportModule( diff --git a/torch/csrc/jit/serialization/import.h b/torch/csrc/jit/serialization/import.h index aa7d457d2b2c..0e2024483f4a 100644 --- a/torch/csrc/jit/serialization/import.h +++ b/torch/csrc/jit/serialization/import.h @@ -140,6 +140,12 @@ TORCH_API Module load_jit_module_from_stream( ExtraFilesMap& extra_files, std::optional device = std::nullopt); +TORCH_API Module parse_and_initialize_jit_module( + const std::shared_ptr& data, + size_t size, + ExtraFilesMap& extra_files, + std::optional device); + TORCH_API c10::intrusive_ptr ObjLoaderFunc( const at::StrongTypePtr& type, IValue input); diff --git a/torch/csrc/jit/serialization/pickler_helper.h b/torch/csrc/jit/serialization/pickler_helper.h index c074ab38c70a..b27d974a10e9 100644 --- a/torch/csrc/jit/serialization/pickler_helper.h +++ b/torch/csrc/jit/serialization/pickler_helper.h @@ -53,7 +53,7 @@ enum class PickleOpCode : char { BINFLOAT = 'G', // Protocol 2 - PROTO = '\x80', + PROTO = char('\x80'), NEWOBJ = '\x81', EXT1 = '\x82', EXT2 = '\x83', @@ -71,7 +71,7 @@ enum class PickleOpCode : char { SHORT_BINBYTES = 'C', // Protocol 4 - SHORT_BINUNICODE = '\x8c', + SHORT_BINUNICODE = char('\x8c'), BINUNICODE8 = '\x8d', BINBYTES8 = '\x8e', EMPTY_SET = '\x8f', diff --git a/torch/csrc/jit/serialization/source_range_serialization.cpp b/torch/csrc/jit/serialization/source_range_serialization.cpp index caefafc6632e..b9a56bc87523 100644 --- a/torch/csrc/jit/serialization/source_range_serialization.cpp +++ b/torch/csrc/jit/serialization/source_range_serialization.cpp @@ -167,7 +167,9 @@ std::vector SourceRangePickler::pickle( } ivalues.emplace_back(c10::ivalue::Tuple::create( - {(int64_t)range.bytes, srs->serialize(range.range), source_range_tag})); + {(int64_t)range.bytes, + srs->serialize(range.range), + static_cast(source_range_tag)})); } std::vector table; diff --git a/torch/csrc/jit/serialization/unpickler.cpp b/torch/csrc/jit/serialization/unpickler.cpp index e520dd0b45c0..9d23cf6d97c2 100644 --- a/torch/csrc/jit/serialization/unpickler.cpp +++ b/torch/csrc/jit/serialization/unpickler.cpp @@ -1063,10 +1063,10 @@ void Unpickler::rebuildRRef() { // const reference will extend the lifetime of the temporary variable const auto& rrefId = distributed::rpc::RRefId( static_cast(args.at(distributed::rpc::RREFID_ON_IDX).toInt()), - args.at(distributed::rpc::RREFID_ID_IDX).toInt()); + static_cast(args.at(distributed::rpc::RREFID_ID_IDX).toInt())); const auto& forkId = distributed::rpc::RRefId( static_cast(args.at(distributed::rpc::FORKID_ON_IDX).toInt()), - args.at(distributed::rpc::FORKID_ID_IDX).toInt()); + static_cast(args.at(distributed::rpc::FORKID_ID_IDX).toInt())); auto parent = static_cast(args.at(distributed::rpc::PARENT_IDX).toInt()); const auto& typeStr = static_cast( diff --git a/torch/csrc/jit/tensorexpr/cuda_codegen.cpp b/torch/csrc/jit/tensorexpr/cuda_codegen.cpp index 35b54acaa8c3..dbf9536ee227 100644 --- a/torch/csrc/jit/tensorexpr/cuda_codegen.cpp +++ b/torch/csrc/jit/tensorexpr/cuda_codegen.cpp @@ -1082,7 +1082,8 @@ void CudaCodeGen::call_with_numel(void** args, int64_t numel) { // https://stackoverflow.com/questions/34388712/cannot-understand-how-jcuda-culaunchkernel-work std::vector ptr_to_args(buffer_args.size()); for (size_t i = 0; i < buffer_args.size(); i++) { - ptr_to_args[i] = buffer_args[i].isVar() ? args[i] : (&args[i]); + ptr_to_args[i] = + buffer_args[i].isVar() ? args[i] : const_cast(&args[i]); } const auto device = this->device().index(); diff --git a/torch/csrc/jit/tensorexpr/eval.h b/torch/csrc/jit/tensorexpr/eval.h index 2582ec5797dd..8cbc1689e0c9 100644 --- a/torch/csrc/jit/tensorexpr/eval.h +++ b/torch/csrc/jit/tensorexpr/eval.h @@ -127,7 +127,7 @@ To raw_bitcast(const From& src) { TORCH_CHECK(sizeof(To) == sizeof(From), "Invalid bitcast invocation"); To storage; std::memcpy(&storage, &src, sizeof(To)); - return storage; + return reinterpret_cast(storage); } class SimpleIREvaluatorImpl; diff --git a/torch/csrc/jit/tensorexpr/kernel.cpp b/torch/csrc/jit/tensorexpr/kernel.cpp index cc1566372038..a8ffa40f58db 100644 --- a/torch/csrc/jit/tensorexpr/kernel.cpp +++ b/torch/csrc/jit/tensorexpr/kernel.cpp @@ -1482,7 +1482,7 @@ std::vector TensorExprKernel::preAllocIntermediateBufs( remaining_interm_bufs.push_back(buf); continue; } - auto bp = malloc(size); + auto bp = (void*)malloc(size); if (!bp) { remaining_interm_bufs.push_back(buf); continue; diff --git a/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp b/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp index 46a09314fb7b..dd5c51d63153 100644 --- a/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp +++ b/torch/csrc/jit/tensorexpr/loopnest_randomization.cpp @@ -369,7 +369,7 @@ void loopnestRandomization(int64_t seed, LoopNest& l) { // Find a random number of loops to fuse int num_loops_to_fuse = - std::max(2, (std::rand() % (int)loops.size())); + std::max(2, (int)(std::rand() % (int)loops.size())); auto [loops_to_fuse, chosen_indices] = randomization_helper::select_n_randomly( diff --git a/torch/csrc/jit/tensorexpr/operators/quantization.h b/torch/csrc/jit/tensorexpr/operators/quantization.h index a33eb1081450..51bdbe730a6a 100644 --- a/torch/csrc/jit/tensorexpr/operators/quantization.h +++ b/torch/csrc/jit/tensorexpr/operators/quantization.h @@ -42,6 +42,13 @@ TORCH_API Tensor computeQuantizedConv2dPrepack( const std::optional& outputType, at::Device device); +TORCH_API Tensor computeQuantizedConv1d( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + TORCH_API Tensor computeQuantizedConv2d( const std::vector& inputs, const std::vector& outputShape, diff --git a/torch/csrc/profiler/collection.h b/torch/csrc/profiler/collection.h index c0f25add5273..515d33053537 100644 --- a/torch/csrc/profiler/collection.h +++ b/torch/csrc/profiler/collection.h @@ -369,7 +369,7 @@ struct ExtraFields { uint64_t correlation_id_{0}; libkineto::ActivityType activity_type_; Flow flow; - std::weak_ptr linked_activity_; + std::weak_ptr linked_activity_{}; std::string metadata_json_; }; diff --git a/torch/csrc/profiler/unwind/fde.h b/torch/csrc/profiler/unwind/fde.h index 083578ec391e..cb3de64486b8 100644 --- a/torch/csrc/profiler/unwind/fde.h +++ b/torch/csrc/profiler/unwind/fde.h @@ -57,7 +57,7 @@ struct FDE { throw UnwindError("unsupported 'eh' augmentation string"); } code_alignment_factor_ = static_cast(LC.readULEB128()); - data_alignment_factor_ = LC.readSLEB128(); + data_alignment_factor_ = static_cast(LC.readSLEB128()); if (version == 1) { ra_register_ = LC.read(); } else {