[1/N] Fix clang-tidy readability checks (#164561)

Check all `.cpp` files except `jit` files for readability thoroughly.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/164561
Approved by: https://github.com/Skylion007
This commit is contained in:
Yuanyuan Chen
2025-10-04 09:40:35 +00:00
committed by PyTorch MergeBot
parent 9580539e2f
commit 5103ecc5d8
66 changed files with 153 additions and 202 deletions

View File

@ -179,7 +179,7 @@ void propagate_names_except(const Tensor& result, const Tensor& src, IntArrayRef
return;
}
const auto src_names = src.names();
const auto result_dim = static_cast<int64_t>(result.dim());
const auto result_dim = result.dim();
const auto src_dim = static_cast<int64_t>(src_names.size());
const auto excluded_dim = static_cast<int64_t>(excluded_idxs.size());
TORCH_INTERNAL_ASSERT(src_dim - excluded_dim == result_dim);

View File

@ -273,11 +273,11 @@ void checkLayout(CheckedFrom c, at::ArrayRef<Tensor> tensors, at::Layout layout)
}
void * maybe_data_ptr(const Tensor& tensor) {
return tensor.defined() ? (void *)tensor.data_ptr() : nullptr;
return tensor.defined() ? tensor.data_ptr() : nullptr;
}
void * maybe_data_ptr(const TensorArg& tensor) {
return tensor->defined() ? (void *)tensor->data_ptr() : nullptr;
return tensor->defined() ? tensor->data_ptr() : nullptr;
}
void check_dim_size(

View File

@ -76,13 +76,7 @@ void _print_dispatch_trace(const std::string& label, const std::string& op_name,
OpRegistrationListener::~OpRegistrationListener()= default;
Dispatcher::Dispatcher()
: operators_()
, operatorLookupTable_()
, backendFallbackKernels_()
, listeners_(std::make_unique<detail::RegistrationListenerList>())
, cond_var_()
, guard_(std::make_shared<Guard>())
Dispatcher::Dispatcher(): backendFallbackKernels_(), listeners_(std::make_unique<detail::RegistrationListenerList>()), guard_(std::make_shared<Guard>())
{}
Dispatcher::~Dispatcher() {

View File

@ -62,17 +62,7 @@ static const auto& getDispatchTableIndexToKey() {
}
OperatorEntry::OperatorEntry(OperatorName&& operator_name)
: name_(std::move(operator_name))
, schema_()
#ifndef C10_MOBILE
, tags_()
#endif
, dispatchTable_()
, dispatchKeyExtractor_(DispatchKeyExtractor::makeUninitialized())
, kernels_()
, cpp_signature_()
, sym_cpp_signature_()
, is_observed_(ObservedOperators::isObserved(name_))
: name_(std::move(operator_name)), dispatchTable_(), dispatchKeyExtractor_(DispatchKeyExtractor::makeUninitialized()), is_observed_(ObservedOperators::isObserved(name_))
{
// Pick up any backend fallbacks that were registered prior to this
// OperatorEntry being created.

View File

@ -73,7 +73,7 @@ c10::FunctionSchema RegisterOperators::inferSchemaFromKernels_(
std::optional<FunctionSchema> inferred_schema = std::nullopt;
for (const auto& kernel : options.kernels) {
if (nullptr != kernel.inferred_function_schema.get()) {
if (nullptr != kernel.inferred_function_schema) {
if (!inferred_schema.has_value()) {
inferred_schema = *kernel.inferred_function_schema;
break;

View File

@ -323,7 +323,7 @@ class CuBlasLtMatmulDescriptor : public CuBlasLtDescriptor<
descriptor_.reset(raw_descriptor);
}
template <typename T>
inline void setAttribute(cublasLtMatmulDescAttributes_t attr, const T value) {
void setAttribute(cublasLtMatmulDescAttributes_t attr, const T value) {
// NOLINTNEXTLINE(bugprone-sizeof-expression)
TORCH_CUDABLAS_CHECK(::cublasLtMatmulDescSetAttribute(descriptor(), attr, &value, sizeof(value)));
}
@ -345,7 +345,7 @@ class CuBlasLtMatrixLayout : public CuBlasLtDescriptor<
descriptor_.reset(raw_descriptor);
}
template <typename T>
inline void setAttribute(cublasLtMatrixLayoutAttribute_t attr, const T value) {
void setAttribute(cublasLtMatrixLayoutAttribute_t attr, const T value) {
TORCH_CUDABLAS_CHECK(::cublasLtMatrixLayoutSetAttribute(descriptor(), attr, &value, sizeof(T)));
}
};
@ -360,7 +360,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
descriptor_.reset(raw_descriptor);
}
template <typename T>
inline void setAttribute(cublasLtMatmulPreferenceAttributes_t attr, const T value) {
void setAttribute(cublasLtMatmulPreferenceAttributes_t attr, const T value) {
TORCH_CUDABLAS_CHECK(::cublasLtMatmulPreferenceSetAttribute(descriptor(), attr, &value, sizeof(T)));
}
};

View File

@ -222,15 +222,15 @@ struct CUDACachingHostAllocatorImpl
size_t numThreads,
size_t pageSize) {
uintptr_t start = (uintptr_t)ptr + (size * i / numThreads);
uintptr_t end = (uintptr_t)start + (size / numThreads);
uintptr_t end = start + (size / numThreads);
if (i == (numThreads - 1)) {
end = (uintptr_t)ptr + size;
}
// pre-fault/map the pages by setting the first byte of the page
uintptr_t alignedStart =
(((uintptr_t)start + pageSize - 1) & ~(pageSize - 1));
for (uintptr_t p = alignedStart; p < ((uintptr_t)end); p += pageSize) {
((start + pageSize - 1) & ~(pageSize - 1));
for (uintptr_t p = alignedStart; p < (end); p += pageSize) {
// NOLINTNEXTLINE(performance-no-int-to-ptr)
memset((void*)p, 0, 1);
}

View File

@ -404,8 +404,6 @@ TuningContext::TuningContext() :
max_warmup_iterations_{0},
icache_flush_{true},
rotating_buffer_size_{-1},
filename_{},
untuned_file_{},
results_count_from_input_file_{0},
is_shutting_down_{false}
{

View File

@ -141,7 +141,7 @@ void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_fo
size[i] = (int) t.size(i);
}
for (const auto i : c10::irange(dim, pad)) {
size[i] = (int) 1;
size[i] = 1;
}
dim = std::max(dim, pad);
cudnnTensorFormat_t filter_format{};

View File

@ -176,7 +176,7 @@ struct LinalgCheckMatrixUnaryRuleHelper;
template <char const *op_name, typename F, F Func, typename A, typename... T>
struct LinalgCheckMatrixUnaryRuleHelper<op_name, F, Func, typelist<A, T...>> {
static inline Tensor check_and_reshape_input(const Tensor& tensor, std::optional<int64_t> batch_dim) {
static Tensor check_and_reshape_input(const Tensor& tensor, std::optional<int64_t> batch_dim) {
TORCH_CHECK(rankWithoutBatchDim(tensor, batch_dim) >= 2, op_name, ": The input tensor A must have at least 2 dimensions.");
return moveBatchDimToFront(tensor, batch_dim);
}
@ -222,7 +222,7 @@ struct LinalgCheckMatrixBinaryRuleHelper;
template <char const *op_name, typename F, F Func, typename A, typename B, typename... T>
struct LinalgCheckMatrixBinaryRuleHelper<op_name, F, Func, typelist<A, B, T...>> {
static inline std::tuple<Tensor, Tensor> check_inputs_and_reshape_inputs(
static std::tuple<Tensor, Tensor> check_inputs_and_reshape_inputs(
const Tensor& first, std::optional<int64_t> first_bdim,
const Tensor& second, std::optional<int64_t> second_bdim) {
TORCH_CHECK(rankWithoutBatchDim(first, first_bdim) >= 2,

View File

@ -58,7 +58,7 @@ scalar_t dot_impl(int64_t n, const scalar_t *x, int64_t incx, const scalar_t *y,
template<typename scalar_t>
scalar_t vdot_impl(int64_t n, const scalar_t *x, int64_t incx, const scalar_t *y, int64_t incy);
static constexpr inline bool lda_cond(int64_t m, int64_t n, int64_t lda) {
static constexpr bool lda_cond(int64_t m, int64_t n, int64_t lda) {
return n == 1 || lda >= std::max<int64_t>(1L, m);
}

View File

@ -991,7 +991,7 @@ std::size_t UnsafeUkernelKeyHasher<PackKey>::operator()(const PackKey& key) cons
template <typename key_t, typename value_t>
struct KernelCache {
using kstore_t = std::unordered_map<key_t, std::shared_ptr<value_t>, UnsafeUkernelKeyHasher<key_t>>;
static inline std::shared_ptr<value_t>&& fetch_or_create(
static std::shared_ptr<value_t>&& fetch_or_create(
const key_t& key,
const std::function<std::shared_ptr<value_t>()>& callback) {
auto&& search = get_store().find(key);
@ -1003,7 +1003,7 @@ struct KernelCache {
}
}
static inline kstore_t& get_store() {
static kstore_t& get_store() {
static thread_local kstore_t cache_kernels;
return cache_kernels;
}
@ -1067,7 +1067,7 @@ struct GemmHelper {
struct Brgemm : public KernelCache <BrgemmKey, GemmHelper> {
// Fetch/create GemmHelper object and execute brgemm with batch size = 1
template <typename scalar_t_a, typename scalar_t_b, typename scalar_t_c>
static inline void call(
static void call(
int64_t M,
int64_t N,
int64_t K,
@ -1118,12 +1118,12 @@ struct Brgemm : public KernelCache <BrgemmKey, GemmHelper> {
.execute(A, B, (*value).A_B_offsets, C, (*value).scratchpad.data());
}
static inline std::shared_ptr<GemmHelper>& get_current() {
static std::shared_ptr<GemmHelper>& get_current() {
static thread_local std::shared_ptr<GemmHelper> current;
return current;
}
static inline bool device_check(ScalarType dtype) {
static bool device_check(ScalarType dtype) {
if (!at::globalContext().userEnabledMkldnn()) {
return false;
}
@ -1153,7 +1153,7 @@ using pack_t = dnnl::ukernel::brgemm_pack_B;
using pack_t = dnnl::ukernel::transform;
#endif
struct Pack : public KernelCache <PackKey, pack_t> {
static inline void call(
static void call(
int64_t K,
int64_t N,
int64_t ld_in,
@ -1182,7 +1182,7 @@ struct Pack : public KernelCache <PackKey, pack_t> {
}
}
static inline bool could_pack(ScalarType dtype) {
static bool could_pack(ScalarType dtype) {
if (!at::globalContext().userEnabledMkldnn()) {
return false;
}

View File

@ -702,7 +702,7 @@ static void check_shape_forward(const at::Tensor& input,
// If kernel size is incorrect
std::ostringstream input_ss;
std::ostringstream kernel_ss;
std::string separator = "";
std::string separator;
for (int i = 0, len = input_shape.size(); i < len; ++i) {
input_ss << separator << input_shape[i];
@ -1019,7 +1019,7 @@ static Tensor convolution_same(
if (symmetric_padding) {
// All backends handle symmetric padding natively
SymDimVector output_padding(static_cast<size_t>(dim));
SymDimVector output_padding(dim);
return at::convolution_symint(input, weight, bias, stride, padding_l, dilation,
false, output_padding, groups);
}
@ -1039,7 +1039,7 @@ static Tensor convolution_same(
}
}
auto padded_input = at::constant_pad_nd_symint(input, pad_nd, 0);
SymDimVector output_padding(static_cast<size_t>(dim));
SymDimVector output_padding(dim);
return at::convolution_symint(padded_input, weight, bias, stride, padding_l,
dilation, false, output_padding, groups);
}

View File

@ -1,6 +1,5 @@
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
#include <ATen/native/Copy.h>
#include <ATen/native/Copy.h>
#include <ATen/core/Tensor.h>
#include <ATen/Dispatch.h>

View File

@ -70,7 +70,7 @@ Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value)
new_shape.emplace_back(input_sizes[i]);
}
for (const auto i : c10::irange((size_t)l_pad)) {
for (const auto i : c10::irange(l_pad)) {
auto pad_idx = pad.size() - ((i + 1) * 2);
auto new_dim = input_sizes[l_diff + i] + pad[pad_idx] + pad[pad_idx + 1];
TORCH_CHECK(new_dim >= 0, "The input size ", input_sizes[l_diff + i], ", plus negative padding ",

View File

@ -107,11 +107,6 @@ void resize_bytes_cpu(StorageImpl* storage, size_t size_bytes) {
storage->set_nbytes(size_bytes);
}
// Call the sparse implementation in SparseTensor.cpp directly.
// A dynamic dispatch here is NOT necessary, so I didn't put
// this function in native_functions.yaml
const Tensor& resize_as_sparse_(const Tensor& self, const Tensor& src);
// TODO(VitalyFedyunin): Move it to HTML docs.
//
// Strides of the output tensor of `resize_as_` operator is defined by input

View File

@ -145,12 +145,6 @@
#include <utility>
#include <vector>
namespace at::native {
AdvancedIndex make_info(Tensor self, IOptTensorListRef orig);
} // namespace at::native
namespace at::meta {
TORCH_META_FUNC(gather)

View File

@ -73,7 +73,6 @@
#include <ATen/ops/where_native.h>
#include <ATen/ops/zeros_like.h>
#include <iostream>
#include <utility>
#endif

View File

@ -124,7 +124,7 @@ struct IsUnique {};
template <typename scalar_t>
struct IsUnique<scalar_t, false> {
inline bool operator() (scalar_t* data_ptr, int64_t i) {
bool operator() (scalar_t* data_ptr, int64_t i) {
if (i == 0) { return true; }
return c10::load(&data_ptr[i]) != c10::load(&data_ptr[i - 1]);
}
@ -132,7 +132,7 @@ struct IsUnique<scalar_t, false> {
template <typename scalar_t>
struct IsUnique<scalar_t, true> {
inline bool operator() (scalar_t* data_ptr, int64_t i) {
bool operator() (scalar_t* data_ptr, int64_t i) {
if (i == 0) { return true; }
return (c10::load(&data_ptr[i]) != c10::load(&data_ptr[i - 1]))
&& !(_isnan(data_ptr[i]) && _isnan(data_ptr[i - 1]));

View File

@ -17,7 +17,7 @@
namespace ao::sparse {
int register_linear_params();
#ifdef USE_FBGEMM

View File

@ -20,7 +20,7 @@
namespace ao::sparse {
int register_linear_params();
#ifdef USE_FBGEMM
namespace {

View File

@ -16,7 +16,7 @@
#endif
namespace ao::sparse {
int register_linear_params();
#ifdef USE_FBGEMM

View File

@ -1919,7 +1919,7 @@ Tensor& _mm_dtype_out_cuda(const Tensor& self, const Tensor& mat2, const at::Sca
TORCH_CHECK(out_dtype == out.scalar_type(), "out_dtype must be the same as the dtype of the provided out tensor");
addmm_out_cuda_impl(const_cast<Tensor&>(out), out, self, mat2, 0, 1);
addmm_out_cuda_impl(out, out, self, mat2, 0, 1);
return out;
}

View File

@ -76,7 +76,6 @@ std::tuple<Tensor, Tensor> _cudnn_ctc_loss_tensor(
#else // AT_CUDNN_ENABLED
#include <ATen/cudnn/Descriptors.h>
#include <ATen/cudnn/Types.h>
#include <ATen/cudnn/Utils.h>
@ -284,9 +283,9 @@ std::tuple<Tensor, Tensor> _cudnn_ctc_loss_tensor(
checkBackend(c, {*targets}, Backend::CUDA);
const auto batch_size = log_probs->size(1);
int64_t input_lengths_size =
input_lengths_.sizes().size() ? input_lengths_.size(0) : 1;
!input_lengths_.sizes().empty() ? input_lengths_.size(0) : 1;
int64_t target_lengths_size =
target_lengths_.sizes().size() ? target_lengths_.size(0) : 1;
!target_lengths_.sizes().empty() ? target_lengths_.size(0) : 1;
TORCH_CHECK(
input_lengths_size == batch_size,
"input_lengths needs to have size to match batch_size");

View File

@ -142,8 +142,6 @@ void run_cudnn_SDP_bprop_nestedtensor(
namespace at {
namespace native {
#include <cudnn_frontend.h>
namespace fe = cudnn_frontend;
constexpr uint8_t MAX_MHA_DIM = 4;

View File

@ -38,7 +38,6 @@ REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_transpose_backward_stub)
#include <ATen/native/mkldnn/MKLDNNCommon.h>
#include <ATen/native/mkldnn/Utils.h>
#include <ATen/native/ConvUtils.h>
#include <c10/util/irange.h>
namespace at::native {
@ -105,7 +104,7 @@ static void check_shape_forward(const Tensor& input,
// If kernel size is incorrect
std::ostringstream input_ss;
std::ostringstream kernel_ss;
std::string separator = "";
std::string separator;
for (int i = 0, len = input_shape.size(); i < len; ++i) {
input_ss << separator << input_shape[i];

View File

@ -316,7 +316,7 @@ Tensor NestedTensor_to_padded_tensor_generic(
TORCH_CHECK(
(int64_t)output_size_.size() == ret_val.dim(),
"Length of output_size does not match NestedTensor dims. Broadcasting is not supported.");
for (int64_t i = 0; i < (int64_t)ret_val.dim(); i++) {
for (int64_t i = 0; i < ret_val.dim(); i++) {
TORCH_CHECK(
output_size_[i] >= ret_val.size(i),
"Value in output_size is less than NestedTensor padded size. Truncation is not supported.");

View File

@ -1198,7 +1198,7 @@ at::Tensor PackedConvWeightsOnednn<kSpatialDim>::apply_impl(
kSpatialDim == 2 ? ideep::format_tag::nhwc : ideep::format_tag::ndhwc);
ideep::tensor src(src_desc, act_contig.data_ptr());
// weights & bias
ideep::tensor& weights = *(weight_.get());
ideep::tensor& weights = *(weight_);
bool with_bias = bias_.has_value();
const auto& kernel_size = weights.get_dims();
// dst

View File

@ -812,7 +812,7 @@ at::Tensor PackedLinearWeightsOnednn::apply_impl(
auto is_input_qint8 = input.scalar_type() == c10::ScalarType::QInt8;
auto input_contig = input.expect_contiguous();
auto& w = *(weight_.get());
auto& w = *weight_;
auto K = input.size(dim - 1), M = input.numel() / K, N = w.get_dim(1);
auto input_dims = {M, K};
auto input_data_type = is_input_qint8 ? dnnl::memory::data_type::s8 : dnnl::memory::data_type::u8;

View File

@ -545,7 +545,7 @@ at::Tensor PackedLinearWeightsOnednn::apply_dynamic_impl(
/*reduce_range=*/reduce_range);
const std::vector<int32_t>& src_zero_point = std::vector<int32_t>(1, q_params.zero_point);
// weights, dst
auto w = *(weight_.get());
auto w = *weight_;
auto dst_dims = {x.get_dim(0), w.get_dim(1)};
const ideep::scale_t& src_scales = ideep::scale_t(1, 1.0/q_params.scale);
const ideep::scale_t& weights_scales = w.get_scale();

View File

@ -12,7 +12,6 @@
#include <ATen/quantized/Quantizer.h>
#include <c10/core/QScheme.h>
#include <c10/util/irange.h>
#include <torch/library.h>
#include <utility>

View File

@ -10,7 +10,6 @@
#include <ATen/quantized/Quantizer.h>
#include <c10/core/QScheme.h>
#include <c10/util/irange.h>
#include <torch/library.h>
int register_linear_params();

View File

@ -65,7 +65,7 @@ Tensor& addmv_out_sparse_compressed(
return result.zero_();
} else {
return at::mul_out(
const_cast<Tensor&>(result),
result,
self,
at::native::scalar_tensor(
beta,

View File

@ -1330,18 +1330,18 @@ Tensor reduce_sparse_csr_cpu_template(const Tensor& sparse, IntArrayRef dims_to_
template <typename scalar_t>
struct ReductionAddOp {
inline scalar_t operator()(const scalar_t& a, const scalar_t& b) const {
scalar_t operator()(const scalar_t& a, const scalar_t& b) const {
return a + b;
}
inline scalar_t identity() const { return 0; }
scalar_t identity() const { return 0; }
};
template <typename scalar_t>
struct ReductionMulOp {
inline scalar_t operator()(const scalar_t& a, const scalar_t& b) const {
scalar_t operator()(const scalar_t& a, const scalar_t& b) const {
return a * b;
}
inline scalar_t identity() const { return 1; }
scalar_t identity() const { return 1; }
};
} // namespace

View File

@ -55,7 +55,6 @@
#include <ATen/ops/is_pinned_native.h>
#include <ATen/ops/resize_as_sparse.h>
#include <ATen/ops/resize_as_sparse_native.h>
#include <ATen/ops/sparse_coo_tensor.h>
#include <ATen/ops/sparse_coo_tensor_native.h>
#include <ATen/ops/sparse_dim_native.h>
#include <ATen/ops/sparse_mask_native.h>

View File

@ -244,7 +244,7 @@ Tensor& addmv_out_sparse_compressed_cuda(
return result.zero_();
} else {
return at::mul_out(
const_cast<Tensor&>(result),
result,
self,
at::native::scalar_tensor(
beta,

View File

@ -203,7 +203,7 @@ class LocalCallbackManager {
// Runtime cache.
size_t global_version_{GlobalCallbackManager::NoVersion};
std::array<CacheEntry, NumRecordScopes> active_callbacks_;
std::mt19937 generator_{};
std::mt19937 generator_;
};
// ============================================================================

View File

@ -816,7 +816,7 @@ struct ExpandableSegment {
struct BlockState {
c10::DeviceIndex device = 0;
cudaStream_t stream = nullptr;
stream_set stream_uses = {};
stream_set stream_uses;
size_t size = 0;
void* ptr = nullptr;
bool allocated = false;
@ -1683,7 +1683,7 @@ class DeviceCachingAllocator {
cudaStreamCaptureStatus status{cudaStreamCaptureStatusNone};
};
inline CaptureInfo stream_get_capture_info(cudaStream_t stream) {
CaptureInfo stream_get_capture_info(cudaStream_t stream) {
CaptureInfo info{};
#if (defined(CUDA_VERSION) && CUDA_VERSION >= 13000)
C10_CUDA_CHECK(cudaStreamGetCaptureInfo(
@ -1997,7 +1997,7 @@ class DeviceCachingAllocator {
ss.put(SHAREABLE_CUDA_EXPANDABLE_SEGMENT);
auto full_range = block->expandable_segment_->share(
SegmentRange(block->ptr, block->size), ss);
offset = (char*)block->ptr - (char*)full_range.ptr;
offset = (char*)block->ptr - full_range.ptr;
}
return ShareableHandle{offset, ss.str()};
}
@ -3384,7 +3384,7 @@ class DeviceCachingAllocator {
if (pool->owner_PrivatePool && pool->owner_PrivatePool->allocator()) {
// If there is an active mempool with a given allocator,
// we use the given allocator's delete function.
pool->owner_PrivatePool->allocator()->raw_delete((void*)block->ptr);
pool->owner_PrivatePool->allocator()->raw_delete(block->ptr);
} else {
C10_CUDA_CHECK(cudaFree((void*)block->ptr));
}
@ -3423,8 +3423,7 @@ class DeviceCachingAllocator {
}
block->pool->blocks.erase(block);
ptrdiff_t before_size =
static_cast<char*>(unmapped.ptr) - static_cast<char*>(block->ptr);
ptrdiff_t before_size = unmapped.ptr - static_cast<char*>(block->ptr);
if (before_size > 0) {
// prev? -> before_free -> block
Block* before_free = new Block(
@ -3442,7 +3441,7 @@ class DeviceCachingAllocator {
block->stream,
after_size,
block->pool,
static_cast<char*>(unmapped.ptr) + unmapped.size);
unmapped.ptr + unmapped.size);
after_free->expandable_segment_ = block->expandable_segment_;
after_free->splice(block, block->next);
block->pool->insert_into_blocks(after_free);
@ -3832,7 +3831,7 @@ class NativeCachingAllocator : public CUDAAllocator {
": did you call init?");
Block* block = device_allocator[device]->malloc(device, size, stream);
add_allocated_block(block);
*devPtr = (void*)block->ptr;
*devPtr = block->ptr;
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
if (C10_UNLIKELY(interp)) {
(*interp)->trace_gpu_memory_allocation(

View File

@ -446,7 +446,7 @@ struct CudaMallocAsyncAllocator : public CUDAAllocator {
return !devs_initialized_flags.empty();
}
static inline void assertValidDevice(c10::DeviceIndex device) {
static void assertValidDevice(c10::DeviceIndex device) {
TORCH_CHECK(
0 <= device && device < device_count, "Invalid device argument.");
}

View File

@ -18,7 +18,7 @@
static PyObject* THPUpperModuleOfDevice = nullptr;
PyObject* THPDevice_New(const at::Device& device) {
auto type = (PyTypeObject*)&THPDeviceType;
auto type = &THPDeviceType;
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
if (!self)
throw python_error();

View File

@ -15,7 +15,7 @@
PyObject* THPDtype_New(at::ScalarType scalar_type, const std::string& name) {
HANDLE_TH_ERRORS
AT_ASSERT(name.length() < DTYPE_NAME_LEN);
auto type = (PyTypeObject*)&THPDtypeType;
auto type = &THPDtypeType;
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
if (!self)
throw python_error();

View File

@ -69,7 +69,7 @@ static PyObject* THPEvent_pynew(
}
PyObject* THPEvent_new(c10::DeviceType device_type, c10::EventFlag flag) {
auto type = (PyTypeObject*)&THPEventType;
auto type = &THPEventType;
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
TORCH_CHECK(self, "Failed to allocate memory for Event");
auto self_ = reinterpret_cast<THPEvent*>(self.get());

View File

@ -11,7 +11,7 @@
#include <string>
PyObject* THPLayout_New(at::Layout layout, const std::string& name) {
auto type = (PyTypeObject*)&THPLayoutType;
auto type = &THPLayoutType;
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
if (!self)
throw python_error();

View File

@ -13,7 +13,7 @@
PyObject* THPMemoryFormat_New(
at::MemoryFormat memory_format,
const std::string& name) {
auto type = (PyTypeObject*)&THPMemoryFormatType;
auto type = &THPMemoryFormatType;
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
if (!self)
throw python_error();

View File

@ -492,7 +492,7 @@ static PyObject* THPModule_addDocStr(PyObject* _unused, PyObject* args) {
static PyObject* THPModule_inferSize(PyObject* _unused, PyObject* args) {
HANDLE_TH_ERRORS
Py_ssize_t num_args = args ? (Py_ssize_t)PyTuple_Size(args) : 0;
Py_ssize_t num_args = args ? PyTuple_Size(args) : 0;
TORCH_CHECK(num_args == 2, "expected exactly 2 arguments");
PyObject* arg1 = PyTuple_GET_ITEM(args, 0);
TORCH_CHECK(THPSize_Check(arg1), "expected a torch.Size as argument 1");

View File

@ -11,7 +11,7 @@
#include <string>
PyObject* THPQScheme_New(at::QScheme qscheme, const std::string& name) {
auto type = (PyTypeObject*)&THPQSchemeType;
auto type = &THPQSchemeType;
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
if (!self)
throw python_error();

View File

@ -482,7 +482,7 @@ static PyObject* THPStorage_setFromFile(PyObject* self, PyObject* args) {
return nullptr;
}
Py_INCREF(self);
return (PyObject*)self;
return self;
}
// file is backed by a fd

View File

@ -102,7 +102,7 @@ static PyObject* THPStream_pynew(
PyObject* THPStream_Wrap(const c10::Stream& stream) {
HANDLE_TH_ERRORS
auto type = (PyTypeObject*)THPStreamClass;
auto type = THPStreamClass;
THPObjectPtr ptr(type->tp_alloc(type, 0));
if (!ptr) {
throw python_error();

View File

@ -18,7 +18,7 @@
#include <sstream>
static PyObject* THPFInfo_New(const at::ScalarType& type) {
auto finfo = (PyTypeObject*)&THPFInfoType;
auto finfo = &THPFInfoType;
auto self = THPObjectPtr{finfo->tp_alloc(finfo, 0)};
if (!self)
throw python_error();
@ -28,7 +28,7 @@ static PyObject* THPFInfo_New(const at::ScalarType& type) {
}
static PyObject* THPIInfo_New(const at::ScalarType& type) {
auto iinfo = (PyTypeObject*)&THPIInfoType;
auto iinfo = &THPIInfoType;
auto self = THPObjectPtr{iinfo->tp_alloc(iinfo, 0)};
if (!self)
throw python_error();

View File

@ -103,9 +103,9 @@ variable_list CrossMapLRN2d::backward(
double cache_ratio_value = 2 * ctx->saved_data["alpha"].toDouble() *
ctx->saved_data["beta"].toDouble() /
static_cast<double>(ctx->saved_data["size"].toInt());
int64_t inversePrePad = static_cast<int64_t>(
ctx->saved_data["size"].toInt() -
(ctx->saved_data["size"].toInt() - 1) / 2);
int64_t inversePrePad =
(ctx->saved_data["size"].toInt() -
(ctx->saved_data["size"].toInt() - 1) / 2);
grad_input.resize_as_(input);
torch::pow_out(

View File

@ -2176,7 +2176,7 @@ Tensor _nested_split_with_sizes_backward(
const Tensor& nt_sizes,
const at::TensorOptions& options) {
// add 1 to account for batch dim
dim = at::maybe_wrap_dim(dim, static_cast<int64_t>(nt_sizes.size(1)) + 1);
dim = at::maybe_wrap_dim(dim, nt_sizes.size(1) + 1);
// it's possible some of the grads are not defined (represents tensors of all
// 0s). Since at::cat can't handle those, let's define them
std::vector<Tensor> grads_all_defined;
@ -2187,10 +2187,9 @@ Tensor _nested_split_with_sizes_backward(
const auto& length = split_sizes[i].guard_int(__FILE__, __LINE__);
auto nt_split_size = nt_sizes.clone();
auto nt_split_size_ptr = nt_split_size.data_ptr<int64_t>();
for (int64_t j : c10::irange(static_cast<int64_t>(nt_sizes.size(0)))) {
for (int64_t j : c10::irange(nt_sizes.size(0))) {
// subtract 1 to account for batch dim
nt_split_size_ptr
[j * static_cast<int64_t>(nt_sizes.size(1)) + (dim - 1)] = length;
nt_split_size_ptr[j * nt_sizes.size(1) + (dim - 1)] = length;
}
Tensor zeros_buffer = at::zeros(
{at::native::get_numel_from_nested_size_tensor(nt_split_size)},

View File

@ -47,7 +47,7 @@ void gds_load_storage(
const size_t nbytes = storage.nbytes();
// Read the binary file
ssize_t ret = cuFileRead(cf_handle, (void*)dataPtr, nbytes, offset, 0);
ssize_t ret = cuFileRead(cf_handle, dataPtr, nbytes, offset, 0);
TORCH_CHECK(ret >= 0, "cuFileRead failed: ", cuGDSFileGetErrorString(ret));
}

View File

@ -303,7 +303,7 @@ at::Scalar as_scalar(PyObject* arg) {
}
if (THPUtils_checkLong(arg)) {
return at::Scalar(static_cast<int64_t>(THPUtils_unpackLong(arg)));
return at::Scalar(THPUtils_unpackLong(arg));
}
if (PyBool_Check(arg)) {
@ -735,8 +735,7 @@ PyObject* THCPModule_memorySnapshot(PyObject* _unused, PyObject* arg) {
"mempool_id elements must be integers");
mempool_id = c10::cuda::MempoolId_t(
static_cast<int64_t>(THPUtils_unpackLong(id1)),
static_cast<int64_t>(THPUtils_unpackLong(id2)));
THPUtils_unpackLong(id1), THPUtils_unpackLong(id2));
}
using c10::cuda::CUDACachingAllocator::BlockInfo;

View File

@ -52,13 +52,9 @@ DistAutogradContainer& DistAutogradContainer::init(int64_t worker_id) {
}
container.worker_id_ = static_cast<int16_t>(worker_id);
container.next_context_id_ = static_cast<int64_t>(worker_id)
<< kAutoIncrementBits;
container.next_autograd_message_id_ = static_cast<int64_t>(worker_id)
<< kAutoIncrementBits;
container.max_id_ =
(kAutoIncrementMask |
(static_cast<int64_t>(worker_id) << kAutoIncrementBits));
container.next_context_id_ = worker_id << kAutoIncrementBits;
container.next_autograd_message_id_ = worker_id << kAutoIncrementBits;
container.max_id_ = (kAutoIncrementMask | (worker_id << kAutoIncrementBits));
container.initialized_ = true;
return container;
}

View File

@ -828,7 +828,7 @@ class AsyncBroadcastWork : public ProcessGroupGloo::AsyncWork {
rootTensor(rootTensor),
tag(tag) {}
std::vector<at::Tensor> inputs{};
std::vector<at::Tensor> inputs;
const int rootRank;
const int rootTensor;
const uint32_t tag;
@ -924,8 +924,8 @@ class AsyncBroadcastCUDAWork : public AsyncBroadcastWork {
}
at::Tensor tmp;
std::vector<c10::Stream> streams{};
std::vector<c10::Event> events{};
std::vector<c10::Stream> streams;
std::vector<c10::Event> events;
};
} // namespace
@ -1160,7 +1160,7 @@ class AsyncReduceWork : public ProcessGroupGloo::AsyncWork {
reduceOp(std::move(reduceOp)),
tag(tag) {}
std::vector<at::Tensor> inputs{};
std::vector<at::Tensor> inputs;
const int rootRank;
const int rootTensor;
const ReduceOp reduceOp;
@ -1276,9 +1276,9 @@ class AsyncReduceCUDAWork : public AsyncReduceWork {
}
}
std::vector<at::Tensor> tmp{};
std::vector<c10::Stream> streams{};
std::vector<c10::Event> events{};
std::vector<at::Tensor> tmp;
std::vector<c10::Stream> streams;
std::vector<c10::Event> events;
};
} // namespace
@ -1362,8 +1362,8 @@ class AsyncAllgatherWork : public ProcessGroupGloo::AsyncWork {
inputs(inputs),
tag(tag) {}
std::vector<std::vector<at::Tensor>> outputs{};
std::vector<at::Tensor> inputs{};
std::vector<std::vector<at::Tensor>> outputs;
std::vector<at::Tensor> inputs;
const uint32_t tag;
void allgather(
@ -1472,13 +1472,13 @@ class AsyncAllgatherCUDAWork : public AsyncAllgatherWork {
}
}
std::vector<at::Tensor> tmpInputs{};
std::vector<c10::Stream> inputStreams{};
std::vector<c10::Event> inputEvents{};
std::vector<at::Tensor> tmpInputs;
std::vector<c10::Stream> inputStreams;
std::vector<c10::Event> inputEvents;
std::vector<std::vector<at::Tensor>> tmpOutputs{};
std::vector<c10::Stream> outputStreams{};
std::vector<c10::Event> outputEvents{};
std::vector<std::vector<at::Tensor>> tmpOutputs;
std::vector<c10::Stream> outputStreams;
std::vector<c10::Event> outputEvents;
};
// A work that takes an lambda on construction and calls it on wait.
@ -1647,8 +1647,8 @@ class AsyncAllgatherCoalescedWork : public ProcessGroupGloo::AsyncWork {
input_list(input_list),
tag(tag) {}
std::vector<std::vector<at::Tensor>> output_lists{};
std::vector<at::Tensor> input_list{};
std::vector<std::vector<at::Tensor>> output_lists;
std::vector<at::Tensor> input_list;
const uint32_t tag;
void allgather_coalesced() {
@ -1801,8 +1801,8 @@ class AsyncGatherWork : public ProcessGroupGloo::AsyncWork {
root(root),
tag(tag) {}
std::vector<std::vector<at::Tensor>> outputs{};
std::vector<at::Tensor> inputs{};
std::vector<std::vector<at::Tensor>> outputs;
std::vector<at::Tensor> inputs;
const int root;
const uint32_t tag;
@ -1920,13 +1920,13 @@ class AsyncGatherCUDAWork : public AsyncGatherWork {
}
}
std::vector<at::Tensor> tmpInputs{};
std::vector<c10::Stream> inputStreams{};
std::vector<c10::Event> inputEvents{};
std::vector<at::Tensor> tmpInputs;
std::vector<c10::Stream> inputStreams;
std::vector<c10::Event> inputEvents;
std::vector<std::vector<at::Tensor>> tmpOutputs{};
std::vector<c10::Stream> outputStreams{};
std::vector<c10::Event> outputEvents{};
std::vector<std::vector<at::Tensor>> tmpOutputs;
std::vector<c10::Stream> outputStreams;
std::vector<c10::Event> outputEvents;
};
} // namespace
@ -2033,8 +2033,8 @@ class AsyncScatterWork : public ProcessGroupGloo::AsyncWork {
root(root),
tag(tag) {}
std::vector<at::Tensor> outputs{};
std::vector<std::vector<at::Tensor>> inputs{};
std::vector<at::Tensor> outputs;
std::vector<std::vector<at::Tensor>> inputs;
const int root;
const uint32_t tag;
@ -2134,13 +2134,13 @@ class AsyncScatterCUDAWork : public AsyncScatterWork {
}
}
std::vector<at::Tensor> tmpOutputs{};
std::vector<c10::Stream> outputStreams{};
std::vector<c10::Event> outputEvents{};
std::vector<at::Tensor> tmpOutputs;
std::vector<c10::Stream> outputStreams;
std::vector<c10::Event> outputEvents;
std::vector<std::vector<at::Tensor>> tmpInputs{};
std::vector<c10::Stream> inputStreams{};
std::vector<c10::Event> inputEvents{};
std::vector<std::vector<at::Tensor>> tmpInputs;
std::vector<c10::Stream> inputStreams;
std::vector<c10::Event> inputEvents;
};
} // namespace
@ -2294,8 +2294,8 @@ class AsyncAlltoallWork : public ProcessGroupGloo::AsyncWork {
at::Tensor outputTensor;
at::Tensor inputTensor;
std::vector<int64_t> outputCounts{};
std::vector<int64_t> inputCounts{};
std::vector<int64_t> outputCounts;
std::vector<int64_t> inputCounts;
const uint32_t tag;
void alltoall(at::Tensor& outputTensor, at::Tensor& inputTensor) {
@ -2397,12 +2397,12 @@ class AsyncAlltoallCUDAWork : public AsyncAlltoallWork {
}
at::Tensor cpuOutput;
std::vector<c10::Stream> outputStreams{};
std::vector<c10::Event> outputEvents{};
std::vector<c10::Stream> outputStreams;
std::vector<c10::Event> outputEvents;
at::Tensor cpuInput;
std::vector<c10::Stream> inputStreams{};
std::vector<c10::Event> inputEvents{};
std::vector<c10::Stream> inputStreams;
std::vector<c10::Event> inputEvents;
};
} // namespace
@ -2576,9 +2576,9 @@ class AsyncBarrierWork : public ProcessGroupGloo::AsyncWork {
priorWork(std::move(priorWork)),
tag(tag) {}
std::vector<c10::weak_intrusive_ptr<AsyncWork>> priorWork{};
std::vector<c10::weak_intrusive_ptr<AsyncWork>> priorWork;
const uint32_t tag;
std::vector<at::Tensor> inputs{};
std::vector<at::Tensor> inputs;
const std::vector<at::Tensor> getInputTensors() override {
return inputs;

View File

@ -126,8 +126,8 @@ class AsyncAllreduceCUDAHostWork : public AsyncAllreduceWork {
}
std::vector<at::Tensor> tmp;
std::vector<c10::Stream> streams{};
std::vector<c10::Event> events{};
std::vector<c10::Stream> streams;
std::vector<c10::Event> events;
};
class AsyncSparseAllreduceCUDAWork : public AsyncSparseAllreduceWork {
@ -180,9 +180,9 @@ class AsyncSparseAllreduceCUDAWork : public AsyncSparseAllreduceWork {
}
}
std::vector<at::Tensor> tmp{};
std::vector<c10::Stream> streams{};
std::vector<c10::Event> events{};
std::vector<at::Tensor> tmp;
std::vector<c10::Stream> streams;
std::vector<c10::Event> events;
};
static c10::intrusive_ptr<ProcessGroupGloo::AsyncWork> makeAllreduceCUDAWork(

View File

@ -96,7 +96,7 @@ class TCPStoreMasterDaemon : public BackgroundThread {
std::unordered_set<int> miscellaneousSockets_;
Socket storeListenSocket_;
std::vector<Socket> sockets_{};
std::vector<Socket> sockets_;
#ifdef _WIN32
const std::chrono::milliseconds checkTimeout_ = std::chrono::milliseconds{10};
HANDLE ghStopEvent_{};

View File

@ -49,8 +49,8 @@ class HandlerRegistry {
}
private:
std::shared_mutex handlersMutex_{};
std::unordered_map<std::string, HandlerFunc> handlers_{};
std::shared_mutex handlersMutex_;
std::unordered_map<std::string, HandlerFunc> handlers_;
};
HandlerRegistry& getHandlerRegistry() {

View File

@ -375,8 +375,7 @@ void Reducer::mark_variable_ready_dense(size_t variable_index) {
// previous iterations, no copy is needed.
if (!grad.is_alias_of(bucket_view)) {
if (comm_hook_ == nullptr) {
auto wrapped =
at::native::wrapped_scalar_tensor(double(1.) / div_factor_);
auto wrapped = at::native::wrapped_scalar_tensor(1. / div_factor_);
if (!grad.requires_grad()) {
// Divides while copying into the bucket view to save one scan over
// all the input parameters.

View File

@ -532,8 +532,8 @@ class SocketListenOp {
std::string port_;
const SocketOptions* opts_;
std::vector<std::string> errors_{};
std::unique_ptr<SocketImpl> socket_{};
std::vector<std::string> errors_;
std::unique_ptr<SocketImpl> socket_;
};
SocketListenOp::SocketListenOp(std::uint16_t port, const SocketOptions& opts)
@ -772,9 +772,9 @@ class SocketConnectOp {
const char* host_;
std::string port_;
const SocketOptions* opts_;
TimePoint deadline_{};
std::vector<std::string> errors_{};
std::unique_ptr<SocketImpl> socket_{};
TimePoint deadline_;
std::vector<std::string> errors_;
std::unique_ptr<SocketImpl> socket_;
};
SocketConnectOp::SocketConnectOp(

View File

@ -200,7 +200,7 @@ int IpcChannel::broadcast_fds(
int world_size = (int)pids.size();
if (rank == src_rank) {
for (int dst_rank = 0; dst_rank < (int)world_size; ++dst_rank) {
for (int dst_rank = 0; dst_rank < world_size; ++dst_rank) {
if (dst_rank == rank) {
continue;
}
@ -242,7 +242,7 @@ void map_block(
CUmemAccessDesc desc;
desc.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
// NOLINTNEXTLINE(bugprone-signed-char-misuse)
desc.location.id = static_cast<int>(device_idx);
desc.location.id = device_idx;
desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
C10_CUDA_DRIVER_CHECK(driver_api->cuMemSetAccess_(*dev_ptr, size, &desc, 1));
#elif defined(USE_ROCM)

View File

@ -622,7 +622,7 @@ struct AutocastState {
struct GlobalStateGuard {
PyObject_HEAD
inline void init() {
void init() {
auto& ctx = at::globalContext();
_grad_mode = at::GradMode::is_enabled();
_autocast_state = AutocastState();
@ -643,7 +643,7 @@ struct GlobalStateGuard {
_default_dtype = at::get_default_dtype();
}
inline bool check() const {
bool check() const {
auto& ctx = at::globalContext();
return (_grad_mode == at::GradMode::is_enabled() &&
_autocast_state == AutocastState() &&
@ -663,7 +663,7 @@ struct GlobalStateGuard {
_default_dtype == at::get_default_dtype();
}
inline std::string reason() const {
std::string reason() const {
std::ostringstream os;
auto& ctx = at::globalContext();
if (_grad_mode != at::GradMode::is_enabled())

View File

@ -418,7 +418,7 @@ void OSSProxyExecutor::get_input_info_from_serialized(
const auto& arg = named_argument["arg"];
const auto& name = named_argument["name"].get<std::string>();
std::string custom_obj_name = "";
std::string custom_obj_name;
if (arg.contains("as_custom_obj")) {
custom_obj_name = arg["as_custom_obj"]["name"].get<std::string>();
}

View File

@ -1374,11 +1374,8 @@ void aoti_torch_warn(
const char* file,
uint32_t line,
const char* msg) {
::c10::warn(::c10::Warning(
::c10::UserWarning(),
{func, file, static_cast<uint32_t>(line)},
msg,
false));
::c10::warn(
::c10::Warning(::c10::UserWarning(), {func, file, line}, msg, false));
}
AOTITorchError aoti_torch__alloc_from_pool(

View File

@ -37,8 +37,8 @@ class EventHandlers {
}
private:
std::mutex mu_{};
std::vector<std::shared_ptr<EventHandler>> handlers_{};
std::mutex mu_;
std::vector<std::shared_ptr<EventHandler>> handlers_;
};
} // namespace

View File

@ -109,15 +109,15 @@ struct TORCH_API ExecutionTraceObserver { // NOLINT
using ID = size_t;
// Mapping of each thread to its own operator stack
std::map<size_t, std::stack<ID>> opStack{};
std::map<size_t, std::stack<ID>> opStack;
// Uses the underlying TensorImpl object pointer as the key and map to its
// unique id.
std::map<const void*, ID> objectId{};
std::map<const void*, ID> objectId;
using weak_storage_ptr = c10::weak_intrusive_ptr<StorageImpl>;
std::unordered_map<const void*, ID> data_ptr_to_storage_id{};
std::unordered_map<const void*, ID> data_ptr_to_storage_id;
std::unordered_map<const void*, weak_storage_ptr>
data_ptr_to_weak_storage_ptr{};
data_ptr_to_weak_storage_ptr;
ID get_tensor_storage_ID(const c10::Storage& t_storage) {
const std::lock_guard<std::recursive_mutex> lock(gMutex);
@ -152,21 +152,21 @@ struct TORCH_API ExecutionTraceObserver { // NOLINT
enum class RunState { uninitialized, disabled, enabled };
// Mutex for multithreaded access to the shared containers.
std::recursive_mutex gMutex{};
std::recursive_mutex gMutex;
// Stream to write output JSON.
std::ofstream out{};
std::ofstream out;
// Full path to the output file.
std::string fileName{};
std::string fileName;
std::string resourceDir{};
std::string resourceDir;
// RecordFunction callback handle for this observer.
CallbackHandle cbHandle{INVALID_CALLBACK_HANDLE};
// Process ID.
int32_t pid{-1};
std::string recordTime{};
std::string recordTime;
ExecutionTraceObserver() = default;
@ -193,7 +193,7 @@ struct TORCH_API ExecutionTraceObserver { // NOLINT
bool record_integral_tensor_range{false};
std::unordered_set<std::string> nodeListForSavingIntegerTensor{};
std::unordered_set<std::string> nodeListForSavingIntegerTensor;
private:
static bool callbackShouldBeEnabled(RunState run_state) {