mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[1/N] Fix clang-tidy readability checks (#164561)
Check all `.cpp` files except `jit` files for readability thoroughly. Pull Request resolved: https://github.com/pytorch/pytorch/pull/164561 Approved by: https://github.com/Skylion007
This commit is contained in:
committed by
PyTorch MergeBot
parent
9580539e2f
commit
5103ecc5d8
@ -179,7 +179,7 @@ void propagate_names_except(const Tensor& result, const Tensor& src, IntArrayRef
|
||||
return;
|
||||
}
|
||||
const auto src_names = src.names();
|
||||
const auto result_dim = static_cast<int64_t>(result.dim());
|
||||
const auto result_dim = result.dim();
|
||||
const auto src_dim = static_cast<int64_t>(src_names.size());
|
||||
const auto excluded_dim = static_cast<int64_t>(excluded_idxs.size());
|
||||
TORCH_INTERNAL_ASSERT(src_dim - excluded_dim == result_dim);
|
||||
|
@ -273,11 +273,11 @@ void checkLayout(CheckedFrom c, at::ArrayRef<Tensor> tensors, at::Layout layout)
|
||||
}
|
||||
|
||||
void * maybe_data_ptr(const Tensor& tensor) {
|
||||
return tensor.defined() ? (void *)tensor.data_ptr() : nullptr;
|
||||
return tensor.defined() ? tensor.data_ptr() : nullptr;
|
||||
}
|
||||
|
||||
void * maybe_data_ptr(const TensorArg& tensor) {
|
||||
return tensor->defined() ? (void *)tensor->data_ptr() : nullptr;
|
||||
return tensor->defined() ? tensor->data_ptr() : nullptr;
|
||||
}
|
||||
|
||||
void check_dim_size(
|
||||
|
@ -76,13 +76,7 @@ void _print_dispatch_trace(const std::string& label, const std::string& op_name,
|
||||
|
||||
OpRegistrationListener::~OpRegistrationListener()= default;
|
||||
|
||||
Dispatcher::Dispatcher()
|
||||
: operators_()
|
||||
, operatorLookupTable_()
|
||||
, backendFallbackKernels_()
|
||||
, listeners_(std::make_unique<detail::RegistrationListenerList>())
|
||||
, cond_var_()
|
||||
, guard_(std::make_shared<Guard>())
|
||||
Dispatcher::Dispatcher(): backendFallbackKernels_(), listeners_(std::make_unique<detail::RegistrationListenerList>()), guard_(std::make_shared<Guard>())
|
||||
{}
|
||||
|
||||
Dispatcher::~Dispatcher() {
|
||||
|
@ -62,17 +62,7 @@ static const auto& getDispatchTableIndexToKey() {
|
||||
}
|
||||
|
||||
OperatorEntry::OperatorEntry(OperatorName&& operator_name)
|
||||
: name_(std::move(operator_name))
|
||||
, schema_()
|
||||
#ifndef C10_MOBILE
|
||||
, tags_()
|
||||
#endif
|
||||
, dispatchTable_()
|
||||
, dispatchKeyExtractor_(DispatchKeyExtractor::makeUninitialized())
|
||||
, kernels_()
|
||||
, cpp_signature_()
|
||||
, sym_cpp_signature_()
|
||||
, is_observed_(ObservedOperators::isObserved(name_))
|
||||
: name_(std::move(operator_name)), dispatchTable_(), dispatchKeyExtractor_(DispatchKeyExtractor::makeUninitialized()), is_observed_(ObservedOperators::isObserved(name_))
|
||||
{
|
||||
// Pick up any backend fallbacks that were registered prior to this
|
||||
// OperatorEntry being created.
|
||||
|
@ -73,7 +73,7 @@ c10::FunctionSchema RegisterOperators::inferSchemaFromKernels_(
|
||||
|
||||
std::optional<FunctionSchema> inferred_schema = std::nullopt;
|
||||
for (const auto& kernel : options.kernels) {
|
||||
if (nullptr != kernel.inferred_function_schema.get()) {
|
||||
if (nullptr != kernel.inferred_function_schema) {
|
||||
if (!inferred_schema.has_value()) {
|
||||
inferred_schema = *kernel.inferred_function_schema;
|
||||
break;
|
||||
|
@ -323,7 +323,7 @@ class CuBlasLtMatmulDescriptor : public CuBlasLtDescriptor<
|
||||
descriptor_.reset(raw_descriptor);
|
||||
}
|
||||
template <typename T>
|
||||
inline void setAttribute(cublasLtMatmulDescAttributes_t attr, const T value) {
|
||||
void setAttribute(cublasLtMatmulDescAttributes_t attr, const T value) {
|
||||
// NOLINTNEXTLINE(bugprone-sizeof-expression)
|
||||
TORCH_CUDABLAS_CHECK(::cublasLtMatmulDescSetAttribute(descriptor(), attr, &value, sizeof(value)));
|
||||
}
|
||||
@ -345,7 +345,7 @@ class CuBlasLtMatrixLayout : public CuBlasLtDescriptor<
|
||||
descriptor_.reset(raw_descriptor);
|
||||
}
|
||||
template <typename T>
|
||||
inline void setAttribute(cublasLtMatrixLayoutAttribute_t attr, const T value) {
|
||||
void setAttribute(cublasLtMatrixLayoutAttribute_t attr, const T value) {
|
||||
TORCH_CUDABLAS_CHECK(::cublasLtMatrixLayoutSetAttribute(descriptor(), attr, &value, sizeof(T)));
|
||||
}
|
||||
};
|
||||
@ -360,7 +360,7 @@ class CuBlasLtMatmulPreference : public CuBlasLtDescriptor<
|
||||
descriptor_.reset(raw_descriptor);
|
||||
}
|
||||
template <typename T>
|
||||
inline void setAttribute(cublasLtMatmulPreferenceAttributes_t attr, const T value) {
|
||||
void setAttribute(cublasLtMatmulPreferenceAttributes_t attr, const T value) {
|
||||
TORCH_CUDABLAS_CHECK(::cublasLtMatmulPreferenceSetAttribute(descriptor(), attr, &value, sizeof(T)));
|
||||
}
|
||||
};
|
||||
|
@ -222,15 +222,15 @@ struct CUDACachingHostAllocatorImpl
|
||||
size_t numThreads,
|
||||
size_t pageSize) {
|
||||
uintptr_t start = (uintptr_t)ptr + (size * i / numThreads);
|
||||
uintptr_t end = (uintptr_t)start + (size / numThreads);
|
||||
uintptr_t end = start + (size / numThreads);
|
||||
if (i == (numThreads - 1)) {
|
||||
end = (uintptr_t)ptr + size;
|
||||
}
|
||||
|
||||
// pre-fault/map the pages by setting the first byte of the page
|
||||
uintptr_t alignedStart =
|
||||
(((uintptr_t)start + pageSize - 1) & ~(pageSize - 1));
|
||||
for (uintptr_t p = alignedStart; p < ((uintptr_t)end); p += pageSize) {
|
||||
((start + pageSize - 1) & ~(pageSize - 1));
|
||||
for (uintptr_t p = alignedStart; p < (end); p += pageSize) {
|
||||
// NOLINTNEXTLINE(performance-no-int-to-ptr)
|
||||
memset((void*)p, 0, 1);
|
||||
}
|
||||
|
@ -404,8 +404,6 @@ TuningContext::TuningContext() :
|
||||
max_warmup_iterations_{0},
|
||||
icache_flush_{true},
|
||||
rotating_buffer_size_{-1},
|
||||
filename_{},
|
||||
untuned_file_{},
|
||||
results_count_from_input_file_{0},
|
||||
is_shutting_down_{false}
|
||||
{
|
||||
|
@ -141,7 +141,7 @@ void FilterDescriptor::set(const at::Tensor &t, const at::MemoryFormat memory_fo
|
||||
size[i] = (int) t.size(i);
|
||||
}
|
||||
for (const auto i : c10::irange(dim, pad)) {
|
||||
size[i] = (int) 1;
|
||||
size[i] = 1;
|
||||
}
|
||||
dim = std::max(dim, pad);
|
||||
cudnnTensorFormat_t filter_format{};
|
||||
|
@ -176,7 +176,7 @@ struct LinalgCheckMatrixUnaryRuleHelper;
|
||||
|
||||
template <char const *op_name, typename F, F Func, typename A, typename... T>
|
||||
struct LinalgCheckMatrixUnaryRuleHelper<op_name, F, Func, typelist<A, T...>> {
|
||||
static inline Tensor check_and_reshape_input(const Tensor& tensor, std::optional<int64_t> batch_dim) {
|
||||
static Tensor check_and_reshape_input(const Tensor& tensor, std::optional<int64_t> batch_dim) {
|
||||
TORCH_CHECK(rankWithoutBatchDim(tensor, batch_dim) >= 2, op_name, ": The input tensor A must have at least 2 dimensions.");
|
||||
return moveBatchDimToFront(tensor, batch_dim);
|
||||
}
|
||||
@ -222,7 +222,7 @@ struct LinalgCheckMatrixBinaryRuleHelper;
|
||||
|
||||
template <char const *op_name, typename F, F Func, typename A, typename B, typename... T>
|
||||
struct LinalgCheckMatrixBinaryRuleHelper<op_name, F, Func, typelist<A, B, T...>> {
|
||||
static inline std::tuple<Tensor, Tensor> check_inputs_and_reshape_inputs(
|
||||
static std::tuple<Tensor, Tensor> check_inputs_and_reshape_inputs(
|
||||
const Tensor& first, std::optional<int64_t> first_bdim,
|
||||
const Tensor& second, std::optional<int64_t> second_bdim) {
|
||||
TORCH_CHECK(rankWithoutBatchDim(first, first_bdim) >= 2,
|
||||
|
@ -58,7 +58,7 @@ scalar_t dot_impl(int64_t n, const scalar_t *x, int64_t incx, const scalar_t *y,
|
||||
template<typename scalar_t>
|
||||
scalar_t vdot_impl(int64_t n, const scalar_t *x, int64_t incx, const scalar_t *y, int64_t incy);
|
||||
|
||||
static constexpr inline bool lda_cond(int64_t m, int64_t n, int64_t lda) {
|
||||
static constexpr bool lda_cond(int64_t m, int64_t n, int64_t lda) {
|
||||
return n == 1 || lda >= std::max<int64_t>(1L, m);
|
||||
}
|
||||
|
||||
|
@ -991,7 +991,7 @@ std::size_t UnsafeUkernelKeyHasher<PackKey>::operator()(const PackKey& key) cons
|
||||
template <typename key_t, typename value_t>
|
||||
struct KernelCache {
|
||||
using kstore_t = std::unordered_map<key_t, std::shared_ptr<value_t>, UnsafeUkernelKeyHasher<key_t>>;
|
||||
static inline std::shared_ptr<value_t>&& fetch_or_create(
|
||||
static std::shared_ptr<value_t>&& fetch_or_create(
|
||||
const key_t& key,
|
||||
const std::function<std::shared_ptr<value_t>()>& callback) {
|
||||
auto&& search = get_store().find(key);
|
||||
@ -1003,7 +1003,7 @@ struct KernelCache {
|
||||
}
|
||||
}
|
||||
|
||||
static inline kstore_t& get_store() {
|
||||
static kstore_t& get_store() {
|
||||
static thread_local kstore_t cache_kernels;
|
||||
return cache_kernels;
|
||||
}
|
||||
@ -1067,7 +1067,7 @@ struct GemmHelper {
|
||||
struct Brgemm : public KernelCache <BrgemmKey, GemmHelper> {
|
||||
// Fetch/create GemmHelper object and execute brgemm with batch size = 1
|
||||
template <typename scalar_t_a, typename scalar_t_b, typename scalar_t_c>
|
||||
static inline void call(
|
||||
static void call(
|
||||
int64_t M,
|
||||
int64_t N,
|
||||
int64_t K,
|
||||
@ -1118,12 +1118,12 @@ struct Brgemm : public KernelCache <BrgemmKey, GemmHelper> {
|
||||
.execute(A, B, (*value).A_B_offsets, C, (*value).scratchpad.data());
|
||||
}
|
||||
|
||||
static inline std::shared_ptr<GemmHelper>& get_current() {
|
||||
static std::shared_ptr<GemmHelper>& get_current() {
|
||||
static thread_local std::shared_ptr<GemmHelper> current;
|
||||
return current;
|
||||
}
|
||||
|
||||
static inline bool device_check(ScalarType dtype) {
|
||||
static bool device_check(ScalarType dtype) {
|
||||
if (!at::globalContext().userEnabledMkldnn()) {
|
||||
return false;
|
||||
}
|
||||
@ -1153,7 +1153,7 @@ using pack_t = dnnl::ukernel::brgemm_pack_B;
|
||||
using pack_t = dnnl::ukernel::transform;
|
||||
#endif
|
||||
struct Pack : public KernelCache <PackKey, pack_t> {
|
||||
static inline void call(
|
||||
static void call(
|
||||
int64_t K,
|
||||
int64_t N,
|
||||
int64_t ld_in,
|
||||
@ -1182,7 +1182,7 @@ struct Pack : public KernelCache <PackKey, pack_t> {
|
||||
}
|
||||
}
|
||||
|
||||
static inline bool could_pack(ScalarType dtype) {
|
||||
static bool could_pack(ScalarType dtype) {
|
||||
if (!at::globalContext().userEnabledMkldnn()) {
|
||||
return false;
|
||||
}
|
||||
|
@ -702,7 +702,7 @@ static void check_shape_forward(const at::Tensor& input,
|
||||
// If kernel size is incorrect
|
||||
std::ostringstream input_ss;
|
||||
std::ostringstream kernel_ss;
|
||||
std::string separator = "";
|
||||
std::string separator;
|
||||
|
||||
for (int i = 0, len = input_shape.size(); i < len; ++i) {
|
||||
input_ss << separator << input_shape[i];
|
||||
@ -1019,7 +1019,7 @@ static Tensor convolution_same(
|
||||
|
||||
if (symmetric_padding) {
|
||||
// All backends handle symmetric padding natively
|
||||
SymDimVector output_padding(static_cast<size_t>(dim));
|
||||
SymDimVector output_padding(dim);
|
||||
return at::convolution_symint(input, weight, bias, stride, padding_l, dilation,
|
||||
false, output_padding, groups);
|
||||
}
|
||||
@ -1039,7 +1039,7 @@ static Tensor convolution_same(
|
||||
}
|
||||
}
|
||||
auto padded_input = at::constant_pad_nd_symint(input, pad_nd, 0);
|
||||
SymDimVector output_padding(static_cast<size_t>(dim));
|
||||
SymDimVector output_padding(dim);
|
||||
return at::convolution_symint(padded_input, weight, bias, stride, padding_l,
|
||||
dilation, false, output_padding, groups);
|
||||
}
|
||||
|
@ -1,6 +1,5 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
#include <ATen/native/Copy.h>
|
||||
#include <ATen/native/Copy.h>
|
||||
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/Dispatch.h>
|
||||
|
@ -70,7 +70,7 @@ Tensor constant_pad_nd(const Tensor& self, IntArrayRef pad, const Scalar& value)
|
||||
new_shape.emplace_back(input_sizes[i]);
|
||||
}
|
||||
|
||||
for (const auto i : c10::irange((size_t)l_pad)) {
|
||||
for (const auto i : c10::irange(l_pad)) {
|
||||
auto pad_idx = pad.size() - ((i + 1) * 2);
|
||||
auto new_dim = input_sizes[l_diff + i] + pad[pad_idx] + pad[pad_idx + 1];
|
||||
TORCH_CHECK(new_dim >= 0, "The input size ", input_sizes[l_diff + i], ", plus negative padding ",
|
||||
|
@ -107,11 +107,6 @@ void resize_bytes_cpu(StorageImpl* storage, size_t size_bytes) {
|
||||
storage->set_nbytes(size_bytes);
|
||||
}
|
||||
|
||||
// Call the sparse implementation in SparseTensor.cpp directly.
|
||||
// A dynamic dispatch here is NOT necessary, so I didn't put
|
||||
// this function in native_functions.yaml
|
||||
const Tensor& resize_as_sparse_(const Tensor& self, const Tensor& src);
|
||||
|
||||
// TODO(VitalyFedyunin): Move it to HTML docs.
|
||||
//
|
||||
// Strides of the output tensor of `resize_as_` operator is defined by input
|
||||
|
@ -145,12 +145,6 @@
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
namespace at::native {
|
||||
|
||||
AdvancedIndex make_info(Tensor self, IOptTensorListRef orig);
|
||||
|
||||
} // namespace at::native
|
||||
|
||||
namespace at::meta {
|
||||
|
||||
TORCH_META_FUNC(gather)
|
||||
|
@ -73,7 +73,6 @@
|
||||
#include <ATen/ops/where_native.h>
|
||||
#include <ATen/ops/zeros_like.h>
|
||||
|
||||
#include <iostream>
|
||||
#include <utility>
|
||||
#endif
|
||||
|
||||
|
@ -124,7 +124,7 @@ struct IsUnique {};
|
||||
|
||||
template <typename scalar_t>
|
||||
struct IsUnique<scalar_t, false> {
|
||||
inline bool operator() (scalar_t* data_ptr, int64_t i) {
|
||||
bool operator() (scalar_t* data_ptr, int64_t i) {
|
||||
if (i == 0) { return true; }
|
||||
return c10::load(&data_ptr[i]) != c10::load(&data_ptr[i - 1]);
|
||||
}
|
||||
@ -132,7 +132,7 @@ struct IsUnique<scalar_t, false> {
|
||||
|
||||
template <typename scalar_t>
|
||||
struct IsUnique<scalar_t, true> {
|
||||
inline bool operator() (scalar_t* data_ptr, int64_t i) {
|
||||
bool operator() (scalar_t* data_ptr, int64_t i) {
|
||||
if (i == 0) { return true; }
|
||||
return (c10::load(&data_ptr[i]) != c10::load(&data_ptr[i - 1]))
|
||||
&& !(_isnan(data_ptr[i]) && _isnan(data_ptr[i - 1]));
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
namespace ao::sparse {
|
||||
|
||||
int register_linear_params();
|
||||
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
|
||||
namespace ao::sparse {
|
||||
|
||||
int register_linear_params();
|
||||
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
namespace {
|
||||
|
@ -16,7 +16,7 @@
|
||||
#endif
|
||||
|
||||
namespace ao::sparse {
|
||||
int register_linear_params();
|
||||
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
|
||||
|
@ -1919,7 +1919,7 @@ Tensor& _mm_dtype_out_cuda(const Tensor& self, const Tensor& mat2, const at::Sca
|
||||
TORCH_CHECK(out_dtype == out.scalar_type(), "out_dtype must be the same as the dtype of the provided out tensor");
|
||||
|
||||
|
||||
addmm_out_cuda_impl(const_cast<Tensor&>(out), out, self, mat2, 0, 1);
|
||||
addmm_out_cuda_impl(out, out, self, mat2, 0, 1);
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -76,7 +76,6 @@ std::tuple<Tensor, Tensor> _cudnn_ctc_loss_tensor(
|
||||
|
||||
#else // AT_CUDNN_ENABLED
|
||||
|
||||
#include <ATen/cudnn/Descriptors.h>
|
||||
#include <ATen/cudnn/Types.h>
|
||||
#include <ATen/cudnn/Utils.h>
|
||||
|
||||
@ -284,9 +283,9 @@ std::tuple<Tensor, Tensor> _cudnn_ctc_loss_tensor(
|
||||
checkBackend(c, {*targets}, Backend::CUDA);
|
||||
const auto batch_size = log_probs->size(1);
|
||||
int64_t input_lengths_size =
|
||||
input_lengths_.sizes().size() ? input_lengths_.size(0) : 1;
|
||||
!input_lengths_.sizes().empty() ? input_lengths_.size(0) : 1;
|
||||
int64_t target_lengths_size =
|
||||
target_lengths_.sizes().size() ? target_lengths_.size(0) : 1;
|
||||
!target_lengths_.sizes().empty() ? target_lengths_.size(0) : 1;
|
||||
TORCH_CHECK(
|
||||
input_lengths_size == batch_size,
|
||||
"input_lengths needs to have size to match batch_size");
|
||||
|
@ -142,8 +142,6 @@ void run_cudnn_SDP_bprop_nestedtensor(
|
||||
namespace at {
|
||||
namespace native {
|
||||
|
||||
#include <cudnn_frontend.h>
|
||||
|
||||
namespace fe = cudnn_frontend;
|
||||
|
||||
constexpr uint8_t MAX_MHA_DIM = 4;
|
||||
|
@ -38,7 +38,6 @@ REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_transpose_backward_stub)
|
||||
|
||||
#include <ATen/native/mkldnn/MKLDNNCommon.h>
|
||||
#include <ATen/native/mkldnn/Utils.h>
|
||||
#include <ATen/native/ConvUtils.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
||||
namespace at::native {
|
||||
@ -105,7 +104,7 @@ static void check_shape_forward(const Tensor& input,
|
||||
// If kernel size is incorrect
|
||||
std::ostringstream input_ss;
|
||||
std::ostringstream kernel_ss;
|
||||
std::string separator = "";
|
||||
std::string separator;
|
||||
|
||||
for (int i = 0, len = input_shape.size(); i < len; ++i) {
|
||||
input_ss << separator << input_shape[i];
|
||||
|
@ -316,7 +316,7 @@ Tensor NestedTensor_to_padded_tensor_generic(
|
||||
TORCH_CHECK(
|
||||
(int64_t)output_size_.size() == ret_val.dim(),
|
||||
"Length of output_size does not match NestedTensor dims. Broadcasting is not supported.");
|
||||
for (int64_t i = 0; i < (int64_t)ret_val.dim(); i++) {
|
||||
for (int64_t i = 0; i < ret_val.dim(); i++) {
|
||||
TORCH_CHECK(
|
||||
output_size_[i] >= ret_val.size(i),
|
||||
"Value in output_size is less than NestedTensor padded size. Truncation is not supported.");
|
||||
|
@ -1198,7 +1198,7 @@ at::Tensor PackedConvWeightsOnednn<kSpatialDim>::apply_impl(
|
||||
kSpatialDim == 2 ? ideep::format_tag::nhwc : ideep::format_tag::ndhwc);
|
||||
ideep::tensor src(src_desc, act_contig.data_ptr());
|
||||
// weights & bias
|
||||
ideep::tensor& weights = *(weight_.get());
|
||||
ideep::tensor& weights = *(weight_);
|
||||
bool with_bias = bias_.has_value();
|
||||
const auto& kernel_size = weights.get_dims();
|
||||
// dst
|
||||
|
@ -812,7 +812,7 @@ at::Tensor PackedLinearWeightsOnednn::apply_impl(
|
||||
|
||||
auto is_input_qint8 = input.scalar_type() == c10::ScalarType::QInt8;
|
||||
auto input_contig = input.expect_contiguous();
|
||||
auto& w = *(weight_.get());
|
||||
auto& w = *weight_;
|
||||
auto K = input.size(dim - 1), M = input.numel() / K, N = w.get_dim(1);
|
||||
auto input_dims = {M, K};
|
||||
auto input_data_type = is_input_qint8 ? dnnl::memory::data_type::s8 : dnnl::memory::data_type::u8;
|
||||
|
@ -545,7 +545,7 @@ at::Tensor PackedLinearWeightsOnednn::apply_dynamic_impl(
|
||||
/*reduce_range=*/reduce_range);
|
||||
const std::vector<int32_t>& src_zero_point = std::vector<int32_t>(1, q_params.zero_point);
|
||||
// weights, dst
|
||||
auto w = *(weight_.get());
|
||||
auto w = *weight_;
|
||||
auto dst_dims = {x.get_dim(0), w.get_dim(1)};
|
||||
const ideep::scale_t& src_scales = ideep::scale_t(1, 1.0/q_params.scale);
|
||||
const ideep::scale_t& weights_scales = w.get_scale();
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <ATen/quantized/Quantizer.h>
|
||||
#include <c10/core/QScheme.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
|
@ -10,7 +10,6 @@
|
||||
#include <ATen/quantized/Quantizer.h>
|
||||
#include <c10/core/QScheme.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <torch/library.h>
|
||||
|
||||
int register_linear_params();
|
||||
|
||||
|
@ -65,7 +65,7 @@ Tensor& addmv_out_sparse_compressed(
|
||||
return result.zero_();
|
||||
} else {
|
||||
return at::mul_out(
|
||||
const_cast<Tensor&>(result),
|
||||
result,
|
||||
self,
|
||||
at::native::scalar_tensor(
|
||||
beta,
|
||||
|
@ -1330,18 +1330,18 @@ Tensor reduce_sparse_csr_cpu_template(const Tensor& sparse, IntArrayRef dims_to_
|
||||
|
||||
template <typename scalar_t>
|
||||
struct ReductionAddOp {
|
||||
inline scalar_t operator()(const scalar_t& a, const scalar_t& b) const {
|
||||
scalar_t operator()(const scalar_t& a, const scalar_t& b) const {
|
||||
return a + b;
|
||||
}
|
||||
inline scalar_t identity() const { return 0; }
|
||||
scalar_t identity() const { return 0; }
|
||||
};
|
||||
|
||||
template <typename scalar_t>
|
||||
struct ReductionMulOp {
|
||||
inline scalar_t operator()(const scalar_t& a, const scalar_t& b) const {
|
||||
scalar_t operator()(const scalar_t& a, const scalar_t& b) const {
|
||||
return a * b;
|
||||
}
|
||||
inline scalar_t identity() const { return 1; }
|
||||
scalar_t identity() const { return 1; }
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
@ -55,7 +55,6 @@
|
||||
#include <ATen/ops/is_pinned_native.h>
|
||||
#include <ATen/ops/resize_as_sparse.h>
|
||||
#include <ATen/ops/resize_as_sparse_native.h>
|
||||
#include <ATen/ops/sparse_coo_tensor.h>
|
||||
#include <ATen/ops/sparse_coo_tensor_native.h>
|
||||
#include <ATen/ops/sparse_dim_native.h>
|
||||
#include <ATen/ops/sparse_mask_native.h>
|
||||
|
@ -244,7 +244,7 @@ Tensor& addmv_out_sparse_compressed_cuda(
|
||||
return result.zero_();
|
||||
} else {
|
||||
return at::mul_out(
|
||||
const_cast<Tensor&>(result),
|
||||
result,
|
||||
self,
|
||||
at::native::scalar_tensor(
|
||||
beta,
|
||||
|
@ -203,7 +203,7 @@ class LocalCallbackManager {
|
||||
// Runtime cache.
|
||||
size_t global_version_{GlobalCallbackManager::NoVersion};
|
||||
std::array<CacheEntry, NumRecordScopes> active_callbacks_;
|
||||
std::mt19937 generator_{};
|
||||
std::mt19937 generator_;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
|
@ -816,7 +816,7 @@ struct ExpandableSegment {
|
||||
struct BlockState {
|
||||
c10::DeviceIndex device = 0;
|
||||
cudaStream_t stream = nullptr;
|
||||
stream_set stream_uses = {};
|
||||
stream_set stream_uses;
|
||||
size_t size = 0;
|
||||
void* ptr = nullptr;
|
||||
bool allocated = false;
|
||||
@ -1683,7 +1683,7 @@ class DeviceCachingAllocator {
|
||||
cudaStreamCaptureStatus status{cudaStreamCaptureStatusNone};
|
||||
};
|
||||
|
||||
inline CaptureInfo stream_get_capture_info(cudaStream_t stream) {
|
||||
CaptureInfo stream_get_capture_info(cudaStream_t stream) {
|
||||
CaptureInfo info{};
|
||||
#if (defined(CUDA_VERSION) && CUDA_VERSION >= 13000)
|
||||
C10_CUDA_CHECK(cudaStreamGetCaptureInfo(
|
||||
@ -1997,7 +1997,7 @@ class DeviceCachingAllocator {
|
||||
ss.put(SHAREABLE_CUDA_EXPANDABLE_SEGMENT);
|
||||
auto full_range = block->expandable_segment_->share(
|
||||
SegmentRange(block->ptr, block->size), ss);
|
||||
offset = (char*)block->ptr - (char*)full_range.ptr;
|
||||
offset = (char*)block->ptr - full_range.ptr;
|
||||
}
|
||||
return ShareableHandle{offset, ss.str()};
|
||||
}
|
||||
@ -3384,7 +3384,7 @@ class DeviceCachingAllocator {
|
||||
if (pool->owner_PrivatePool && pool->owner_PrivatePool->allocator()) {
|
||||
// If there is an active mempool with a given allocator,
|
||||
// we use the given allocator's delete function.
|
||||
pool->owner_PrivatePool->allocator()->raw_delete((void*)block->ptr);
|
||||
pool->owner_PrivatePool->allocator()->raw_delete(block->ptr);
|
||||
} else {
|
||||
C10_CUDA_CHECK(cudaFree((void*)block->ptr));
|
||||
}
|
||||
@ -3423,8 +3423,7 @@ class DeviceCachingAllocator {
|
||||
}
|
||||
block->pool->blocks.erase(block);
|
||||
|
||||
ptrdiff_t before_size =
|
||||
static_cast<char*>(unmapped.ptr) - static_cast<char*>(block->ptr);
|
||||
ptrdiff_t before_size = unmapped.ptr - static_cast<char*>(block->ptr);
|
||||
if (before_size > 0) {
|
||||
// prev? -> before_free -> block
|
||||
Block* before_free = new Block(
|
||||
@ -3442,7 +3441,7 @@ class DeviceCachingAllocator {
|
||||
block->stream,
|
||||
after_size,
|
||||
block->pool,
|
||||
static_cast<char*>(unmapped.ptr) + unmapped.size);
|
||||
unmapped.ptr + unmapped.size);
|
||||
after_free->expandable_segment_ = block->expandable_segment_;
|
||||
after_free->splice(block, block->next);
|
||||
block->pool->insert_into_blocks(after_free);
|
||||
@ -3832,7 +3831,7 @@ class NativeCachingAllocator : public CUDAAllocator {
|
||||
": did you call init?");
|
||||
Block* block = device_allocator[device]->malloc(device, size, stream);
|
||||
add_allocated_block(block);
|
||||
*devPtr = (void*)block->ptr;
|
||||
*devPtr = block->ptr;
|
||||
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
|
||||
if (C10_UNLIKELY(interp)) {
|
||||
(*interp)->trace_gpu_memory_allocation(
|
||||
|
@ -446,7 +446,7 @@ struct CudaMallocAsyncAllocator : public CUDAAllocator {
|
||||
return !devs_initialized_flags.empty();
|
||||
}
|
||||
|
||||
static inline void assertValidDevice(c10::DeviceIndex device) {
|
||||
static void assertValidDevice(c10::DeviceIndex device) {
|
||||
TORCH_CHECK(
|
||||
0 <= device && device < device_count, "Invalid device argument.");
|
||||
}
|
||||
|
@ -18,7 +18,7 @@
|
||||
static PyObject* THPUpperModuleOfDevice = nullptr;
|
||||
|
||||
PyObject* THPDevice_New(const at::Device& device) {
|
||||
auto type = (PyTypeObject*)&THPDeviceType;
|
||||
auto type = &THPDeviceType;
|
||||
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
|
||||
if (!self)
|
||||
throw python_error();
|
||||
|
@ -15,7 +15,7 @@
|
||||
PyObject* THPDtype_New(at::ScalarType scalar_type, const std::string& name) {
|
||||
HANDLE_TH_ERRORS
|
||||
AT_ASSERT(name.length() < DTYPE_NAME_LEN);
|
||||
auto type = (PyTypeObject*)&THPDtypeType;
|
||||
auto type = &THPDtypeType;
|
||||
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
|
||||
if (!self)
|
||||
throw python_error();
|
||||
|
@ -69,7 +69,7 @@ static PyObject* THPEvent_pynew(
|
||||
}
|
||||
|
||||
PyObject* THPEvent_new(c10::DeviceType device_type, c10::EventFlag flag) {
|
||||
auto type = (PyTypeObject*)&THPEventType;
|
||||
auto type = &THPEventType;
|
||||
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
|
||||
TORCH_CHECK(self, "Failed to allocate memory for Event");
|
||||
auto self_ = reinterpret_cast<THPEvent*>(self.get());
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <string>
|
||||
|
||||
PyObject* THPLayout_New(at::Layout layout, const std::string& name) {
|
||||
auto type = (PyTypeObject*)&THPLayoutType;
|
||||
auto type = &THPLayoutType;
|
||||
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
|
||||
if (!self)
|
||||
throw python_error();
|
||||
|
@ -13,7 +13,7 @@
|
||||
PyObject* THPMemoryFormat_New(
|
||||
at::MemoryFormat memory_format,
|
||||
const std::string& name) {
|
||||
auto type = (PyTypeObject*)&THPMemoryFormatType;
|
||||
auto type = &THPMemoryFormatType;
|
||||
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
|
||||
if (!self)
|
||||
throw python_error();
|
||||
|
@ -492,7 +492,7 @@ static PyObject* THPModule_addDocStr(PyObject* _unused, PyObject* args) {
|
||||
|
||||
static PyObject* THPModule_inferSize(PyObject* _unused, PyObject* args) {
|
||||
HANDLE_TH_ERRORS
|
||||
Py_ssize_t num_args = args ? (Py_ssize_t)PyTuple_Size(args) : 0;
|
||||
Py_ssize_t num_args = args ? PyTuple_Size(args) : 0;
|
||||
TORCH_CHECK(num_args == 2, "expected exactly 2 arguments");
|
||||
PyObject* arg1 = PyTuple_GET_ITEM(args, 0);
|
||||
TORCH_CHECK(THPSize_Check(arg1), "expected a torch.Size as argument 1");
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include <string>
|
||||
|
||||
PyObject* THPQScheme_New(at::QScheme qscheme, const std::string& name) {
|
||||
auto type = (PyTypeObject*)&THPQSchemeType;
|
||||
auto type = &THPQSchemeType;
|
||||
auto self = THPObjectPtr{type->tp_alloc(type, 0)};
|
||||
if (!self)
|
||||
throw python_error();
|
||||
|
@ -482,7 +482,7 @@ static PyObject* THPStorage_setFromFile(PyObject* self, PyObject* args) {
|
||||
return nullptr;
|
||||
}
|
||||
Py_INCREF(self);
|
||||
return (PyObject*)self;
|
||||
return self;
|
||||
}
|
||||
|
||||
// file is backed by a fd
|
||||
|
@ -102,7 +102,7 @@ static PyObject* THPStream_pynew(
|
||||
|
||||
PyObject* THPStream_Wrap(const c10::Stream& stream) {
|
||||
HANDLE_TH_ERRORS
|
||||
auto type = (PyTypeObject*)THPStreamClass;
|
||||
auto type = THPStreamClass;
|
||||
THPObjectPtr ptr(type->tp_alloc(type, 0));
|
||||
if (!ptr) {
|
||||
throw python_error();
|
||||
|
@ -18,7 +18,7 @@
|
||||
#include <sstream>
|
||||
|
||||
static PyObject* THPFInfo_New(const at::ScalarType& type) {
|
||||
auto finfo = (PyTypeObject*)&THPFInfoType;
|
||||
auto finfo = &THPFInfoType;
|
||||
auto self = THPObjectPtr{finfo->tp_alloc(finfo, 0)};
|
||||
if (!self)
|
||||
throw python_error();
|
||||
@ -28,7 +28,7 @@ static PyObject* THPFInfo_New(const at::ScalarType& type) {
|
||||
}
|
||||
|
||||
static PyObject* THPIInfo_New(const at::ScalarType& type) {
|
||||
auto iinfo = (PyTypeObject*)&THPIInfoType;
|
||||
auto iinfo = &THPIInfoType;
|
||||
auto self = THPObjectPtr{iinfo->tp_alloc(iinfo, 0)};
|
||||
if (!self)
|
||||
throw python_error();
|
||||
|
@ -103,9 +103,9 @@ variable_list CrossMapLRN2d::backward(
|
||||
double cache_ratio_value = 2 * ctx->saved_data["alpha"].toDouble() *
|
||||
ctx->saved_data["beta"].toDouble() /
|
||||
static_cast<double>(ctx->saved_data["size"].toInt());
|
||||
int64_t inversePrePad = static_cast<int64_t>(
|
||||
ctx->saved_data["size"].toInt() -
|
||||
(ctx->saved_data["size"].toInt() - 1) / 2);
|
||||
int64_t inversePrePad =
|
||||
(ctx->saved_data["size"].toInt() -
|
||||
(ctx->saved_data["size"].toInt() - 1) / 2);
|
||||
|
||||
grad_input.resize_as_(input);
|
||||
torch::pow_out(
|
||||
|
@ -2176,7 +2176,7 @@ Tensor _nested_split_with_sizes_backward(
|
||||
const Tensor& nt_sizes,
|
||||
const at::TensorOptions& options) {
|
||||
// add 1 to account for batch dim
|
||||
dim = at::maybe_wrap_dim(dim, static_cast<int64_t>(nt_sizes.size(1)) + 1);
|
||||
dim = at::maybe_wrap_dim(dim, nt_sizes.size(1) + 1);
|
||||
// it's possible some of the grads are not defined (represents tensors of all
|
||||
// 0s). Since at::cat can't handle those, let's define them
|
||||
std::vector<Tensor> grads_all_defined;
|
||||
@ -2187,10 +2187,9 @@ Tensor _nested_split_with_sizes_backward(
|
||||
const auto& length = split_sizes[i].guard_int(__FILE__, __LINE__);
|
||||
auto nt_split_size = nt_sizes.clone();
|
||||
auto nt_split_size_ptr = nt_split_size.data_ptr<int64_t>();
|
||||
for (int64_t j : c10::irange(static_cast<int64_t>(nt_sizes.size(0)))) {
|
||||
for (int64_t j : c10::irange(nt_sizes.size(0))) {
|
||||
// subtract 1 to account for batch dim
|
||||
nt_split_size_ptr
|
||||
[j * static_cast<int64_t>(nt_sizes.size(1)) + (dim - 1)] = length;
|
||||
nt_split_size_ptr[j * nt_sizes.size(1) + (dim - 1)] = length;
|
||||
}
|
||||
Tensor zeros_buffer = at::zeros(
|
||||
{at::native::get_numel_from_nested_size_tensor(nt_split_size)},
|
||||
|
@ -47,7 +47,7 @@ void gds_load_storage(
|
||||
const size_t nbytes = storage.nbytes();
|
||||
|
||||
// Read the binary file
|
||||
ssize_t ret = cuFileRead(cf_handle, (void*)dataPtr, nbytes, offset, 0);
|
||||
ssize_t ret = cuFileRead(cf_handle, dataPtr, nbytes, offset, 0);
|
||||
TORCH_CHECK(ret >= 0, "cuFileRead failed: ", cuGDSFileGetErrorString(ret));
|
||||
}
|
||||
|
||||
|
@ -303,7 +303,7 @@ at::Scalar as_scalar(PyObject* arg) {
|
||||
}
|
||||
|
||||
if (THPUtils_checkLong(arg)) {
|
||||
return at::Scalar(static_cast<int64_t>(THPUtils_unpackLong(arg)));
|
||||
return at::Scalar(THPUtils_unpackLong(arg));
|
||||
}
|
||||
|
||||
if (PyBool_Check(arg)) {
|
||||
@ -735,8 +735,7 @@ PyObject* THCPModule_memorySnapshot(PyObject* _unused, PyObject* arg) {
|
||||
"mempool_id elements must be integers");
|
||||
|
||||
mempool_id = c10::cuda::MempoolId_t(
|
||||
static_cast<int64_t>(THPUtils_unpackLong(id1)),
|
||||
static_cast<int64_t>(THPUtils_unpackLong(id2)));
|
||||
THPUtils_unpackLong(id1), THPUtils_unpackLong(id2));
|
||||
}
|
||||
|
||||
using c10::cuda::CUDACachingAllocator::BlockInfo;
|
||||
|
@ -52,13 +52,9 @@ DistAutogradContainer& DistAutogradContainer::init(int64_t worker_id) {
|
||||
}
|
||||
|
||||
container.worker_id_ = static_cast<int16_t>(worker_id);
|
||||
container.next_context_id_ = static_cast<int64_t>(worker_id)
|
||||
<< kAutoIncrementBits;
|
||||
container.next_autograd_message_id_ = static_cast<int64_t>(worker_id)
|
||||
<< kAutoIncrementBits;
|
||||
container.max_id_ =
|
||||
(kAutoIncrementMask |
|
||||
(static_cast<int64_t>(worker_id) << kAutoIncrementBits));
|
||||
container.next_context_id_ = worker_id << kAutoIncrementBits;
|
||||
container.next_autograd_message_id_ = worker_id << kAutoIncrementBits;
|
||||
container.max_id_ = (kAutoIncrementMask | (worker_id << kAutoIncrementBits));
|
||||
container.initialized_ = true;
|
||||
return container;
|
||||
}
|
||||
|
@ -828,7 +828,7 @@ class AsyncBroadcastWork : public ProcessGroupGloo::AsyncWork {
|
||||
rootTensor(rootTensor),
|
||||
tag(tag) {}
|
||||
|
||||
std::vector<at::Tensor> inputs{};
|
||||
std::vector<at::Tensor> inputs;
|
||||
const int rootRank;
|
||||
const int rootTensor;
|
||||
const uint32_t tag;
|
||||
@ -924,8 +924,8 @@ class AsyncBroadcastCUDAWork : public AsyncBroadcastWork {
|
||||
}
|
||||
|
||||
at::Tensor tmp;
|
||||
std::vector<c10::Stream> streams{};
|
||||
std::vector<c10::Event> events{};
|
||||
std::vector<c10::Stream> streams;
|
||||
std::vector<c10::Event> events;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -1160,7 +1160,7 @@ class AsyncReduceWork : public ProcessGroupGloo::AsyncWork {
|
||||
reduceOp(std::move(reduceOp)),
|
||||
tag(tag) {}
|
||||
|
||||
std::vector<at::Tensor> inputs{};
|
||||
std::vector<at::Tensor> inputs;
|
||||
const int rootRank;
|
||||
const int rootTensor;
|
||||
const ReduceOp reduceOp;
|
||||
@ -1276,9 +1276,9 @@ class AsyncReduceCUDAWork : public AsyncReduceWork {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> tmp{};
|
||||
std::vector<c10::Stream> streams{};
|
||||
std::vector<c10::Event> events{};
|
||||
std::vector<at::Tensor> tmp;
|
||||
std::vector<c10::Stream> streams;
|
||||
std::vector<c10::Event> events;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -1362,8 +1362,8 @@ class AsyncAllgatherWork : public ProcessGroupGloo::AsyncWork {
|
||||
inputs(inputs),
|
||||
tag(tag) {}
|
||||
|
||||
std::vector<std::vector<at::Tensor>> outputs{};
|
||||
std::vector<at::Tensor> inputs{};
|
||||
std::vector<std::vector<at::Tensor>> outputs;
|
||||
std::vector<at::Tensor> inputs;
|
||||
const uint32_t tag;
|
||||
|
||||
void allgather(
|
||||
@ -1472,13 +1472,13 @@ class AsyncAllgatherCUDAWork : public AsyncAllgatherWork {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> tmpInputs{};
|
||||
std::vector<c10::Stream> inputStreams{};
|
||||
std::vector<c10::Event> inputEvents{};
|
||||
std::vector<at::Tensor> tmpInputs;
|
||||
std::vector<c10::Stream> inputStreams;
|
||||
std::vector<c10::Event> inputEvents;
|
||||
|
||||
std::vector<std::vector<at::Tensor>> tmpOutputs{};
|
||||
std::vector<c10::Stream> outputStreams{};
|
||||
std::vector<c10::Event> outputEvents{};
|
||||
std::vector<std::vector<at::Tensor>> tmpOutputs;
|
||||
std::vector<c10::Stream> outputStreams;
|
||||
std::vector<c10::Event> outputEvents;
|
||||
};
|
||||
|
||||
// A work that takes an lambda on construction and calls it on wait.
|
||||
@ -1647,8 +1647,8 @@ class AsyncAllgatherCoalescedWork : public ProcessGroupGloo::AsyncWork {
|
||||
input_list(input_list),
|
||||
tag(tag) {}
|
||||
|
||||
std::vector<std::vector<at::Tensor>> output_lists{};
|
||||
std::vector<at::Tensor> input_list{};
|
||||
std::vector<std::vector<at::Tensor>> output_lists;
|
||||
std::vector<at::Tensor> input_list;
|
||||
const uint32_t tag;
|
||||
|
||||
void allgather_coalesced() {
|
||||
@ -1801,8 +1801,8 @@ class AsyncGatherWork : public ProcessGroupGloo::AsyncWork {
|
||||
root(root),
|
||||
tag(tag) {}
|
||||
|
||||
std::vector<std::vector<at::Tensor>> outputs{};
|
||||
std::vector<at::Tensor> inputs{};
|
||||
std::vector<std::vector<at::Tensor>> outputs;
|
||||
std::vector<at::Tensor> inputs;
|
||||
const int root;
|
||||
const uint32_t tag;
|
||||
|
||||
@ -1920,13 +1920,13 @@ class AsyncGatherCUDAWork : public AsyncGatherWork {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> tmpInputs{};
|
||||
std::vector<c10::Stream> inputStreams{};
|
||||
std::vector<c10::Event> inputEvents{};
|
||||
std::vector<at::Tensor> tmpInputs;
|
||||
std::vector<c10::Stream> inputStreams;
|
||||
std::vector<c10::Event> inputEvents;
|
||||
|
||||
std::vector<std::vector<at::Tensor>> tmpOutputs{};
|
||||
std::vector<c10::Stream> outputStreams{};
|
||||
std::vector<c10::Event> outputEvents{};
|
||||
std::vector<std::vector<at::Tensor>> tmpOutputs;
|
||||
std::vector<c10::Stream> outputStreams;
|
||||
std::vector<c10::Event> outputEvents;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -2033,8 +2033,8 @@ class AsyncScatterWork : public ProcessGroupGloo::AsyncWork {
|
||||
root(root),
|
||||
tag(tag) {}
|
||||
|
||||
std::vector<at::Tensor> outputs{};
|
||||
std::vector<std::vector<at::Tensor>> inputs{};
|
||||
std::vector<at::Tensor> outputs;
|
||||
std::vector<std::vector<at::Tensor>> inputs;
|
||||
const int root;
|
||||
const uint32_t tag;
|
||||
|
||||
@ -2134,13 +2134,13 @@ class AsyncScatterCUDAWork : public AsyncScatterWork {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> tmpOutputs{};
|
||||
std::vector<c10::Stream> outputStreams{};
|
||||
std::vector<c10::Event> outputEvents{};
|
||||
std::vector<at::Tensor> tmpOutputs;
|
||||
std::vector<c10::Stream> outputStreams;
|
||||
std::vector<c10::Event> outputEvents;
|
||||
|
||||
std::vector<std::vector<at::Tensor>> tmpInputs{};
|
||||
std::vector<c10::Stream> inputStreams{};
|
||||
std::vector<c10::Event> inputEvents{};
|
||||
std::vector<std::vector<at::Tensor>> tmpInputs;
|
||||
std::vector<c10::Stream> inputStreams;
|
||||
std::vector<c10::Event> inputEvents;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -2294,8 +2294,8 @@ class AsyncAlltoallWork : public ProcessGroupGloo::AsyncWork {
|
||||
|
||||
at::Tensor outputTensor;
|
||||
at::Tensor inputTensor;
|
||||
std::vector<int64_t> outputCounts{};
|
||||
std::vector<int64_t> inputCounts{};
|
||||
std::vector<int64_t> outputCounts;
|
||||
std::vector<int64_t> inputCounts;
|
||||
const uint32_t tag;
|
||||
|
||||
void alltoall(at::Tensor& outputTensor, at::Tensor& inputTensor) {
|
||||
@ -2397,12 +2397,12 @@ class AsyncAlltoallCUDAWork : public AsyncAlltoallWork {
|
||||
}
|
||||
|
||||
at::Tensor cpuOutput;
|
||||
std::vector<c10::Stream> outputStreams{};
|
||||
std::vector<c10::Event> outputEvents{};
|
||||
std::vector<c10::Stream> outputStreams;
|
||||
std::vector<c10::Event> outputEvents;
|
||||
|
||||
at::Tensor cpuInput;
|
||||
std::vector<c10::Stream> inputStreams{};
|
||||
std::vector<c10::Event> inputEvents{};
|
||||
std::vector<c10::Stream> inputStreams;
|
||||
std::vector<c10::Event> inputEvents;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
@ -2576,9 +2576,9 @@ class AsyncBarrierWork : public ProcessGroupGloo::AsyncWork {
|
||||
priorWork(std::move(priorWork)),
|
||||
tag(tag) {}
|
||||
|
||||
std::vector<c10::weak_intrusive_ptr<AsyncWork>> priorWork{};
|
||||
std::vector<c10::weak_intrusive_ptr<AsyncWork>> priorWork;
|
||||
const uint32_t tag;
|
||||
std::vector<at::Tensor> inputs{};
|
||||
std::vector<at::Tensor> inputs;
|
||||
|
||||
const std::vector<at::Tensor> getInputTensors() override {
|
||||
return inputs;
|
||||
|
@ -126,8 +126,8 @@ class AsyncAllreduceCUDAHostWork : public AsyncAllreduceWork {
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> tmp;
|
||||
std::vector<c10::Stream> streams{};
|
||||
std::vector<c10::Event> events{};
|
||||
std::vector<c10::Stream> streams;
|
||||
std::vector<c10::Event> events;
|
||||
};
|
||||
|
||||
class AsyncSparseAllreduceCUDAWork : public AsyncSparseAllreduceWork {
|
||||
@ -180,9 +180,9 @@ class AsyncSparseAllreduceCUDAWork : public AsyncSparseAllreduceWork {
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> tmp{};
|
||||
std::vector<c10::Stream> streams{};
|
||||
std::vector<c10::Event> events{};
|
||||
std::vector<at::Tensor> tmp;
|
||||
std::vector<c10::Stream> streams;
|
||||
std::vector<c10::Event> events;
|
||||
};
|
||||
|
||||
static c10::intrusive_ptr<ProcessGroupGloo::AsyncWork> makeAllreduceCUDAWork(
|
||||
|
@ -96,7 +96,7 @@ class TCPStoreMasterDaemon : public BackgroundThread {
|
||||
std::unordered_set<int> miscellaneousSockets_;
|
||||
|
||||
Socket storeListenSocket_;
|
||||
std::vector<Socket> sockets_{};
|
||||
std::vector<Socket> sockets_;
|
||||
#ifdef _WIN32
|
||||
const std::chrono::milliseconds checkTimeout_ = std::chrono::milliseconds{10};
|
||||
HANDLE ghStopEvent_{};
|
||||
|
@ -49,8 +49,8 @@ class HandlerRegistry {
|
||||
}
|
||||
|
||||
private:
|
||||
std::shared_mutex handlersMutex_{};
|
||||
std::unordered_map<std::string, HandlerFunc> handlers_{};
|
||||
std::shared_mutex handlersMutex_;
|
||||
std::unordered_map<std::string, HandlerFunc> handlers_;
|
||||
};
|
||||
|
||||
HandlerRegistry& getHandlerRegistry() {
|
||||
|
@ -375,8 +375,7 @@ void Reducer::mark_variable_ready_dense(size_t variable_index) {
|
||||
// previous iterations, no copy is needed.
|
||||
if (!grad.is_alias_of(bucket_view)) {
|
||||
if (comm_hook_ == nullptr) {
|
||||
auto wrapped =
|
||||
at::native::wrapped_scalar_tensor(double(1.) / div_factor_);
|
||||
auto wrapped = at::native::wrapped_scalar_tensor(1. / div_factor_);
|
||||
if (!grad.requires_grad()) {
|
||||
// Divides while copying into the bucket view to save one scan over
|
||||
// all the input parameters.
|
||||
|
@ -532,8 +532,8 @@ class SocketListenOp {
|
||||
|
||||
std::string port_;
|
||||
const SocketOptions* opts_;
|
||||
std::vector<std::string> errors_{};
|
||||
std::unique_ptr<SocketImpl> socket_{};
|
||||
std::vector<std::string> errors_;
|
||||
std::unique_ptr<SocketImpl> socket_;
|
||||
};
|
||||
|
||||
SocketListenOp::SocketListenOp(std::uint16_t port, const SocketOptions& opts)
|
||||
@ -772,9 +772,9 @@ class SocketConnectOp {
|
||||
const char* host_;
|
||||
std::string port_;
|
||||
const SocketOptions* opts_;
|
||||
TimePoint deadline_{};
|
||||
std::vector<std::string> errors_{};
|
||||
std::unique_ptr<SocketImpl> socket_{};
|
||||
TimePoint deadline_;
|
||||
std::vector<std::string> errors_;
|
||||
std::unique_ptr<SocketImpl> socket_;
|
||||
};
|
||||
|
||||
SocketConnectOp::SocketConnectOp(
|
||||
|
@ -200,7 +200,7 @@ int IpcChannel::broadcast_fds(
|
||||
int world_size = (int)pids.size();
|
||||
|
||||
if (rank == src_rank) {
|
||||
for (int dst_rank = 0; dst_rank < (int)world_size; ++dst_rank) {
|
||||
for (int dst_rank = 0; dst_rank < world_size; ++dst_rank) {
|
||||
if (dst_rank == rank) {
|
||||
continue;
|
||||
}
|
||||
@ -242,7 +242,7 @@ void map_block(
|
||||
CUmemAccessDesc desc;
|
||||
desc.location.type = CU_MEM_LOCATION_TYPE_DEVICE;
|
||||
// NOLINTNEXTLINE(bugprone-signed-char-misuse)
|
||||
desc.location.id = static_cast<int>(device_idx);
|
||||
desc.location.id = device_idx;
|
||||
desc.flags = CU_MEM_ACCESS_FLAGS_PROT_READWRITE;
|
||||
C10_CUDA_DRIVER_CHECK(driver_api->cuMemSetAccess_(*dev_ptr, size, &desc, 1));
|
||||
#elif defined(USE_ROCM)
|
||||
|
@ -622,7 +622,7 @@ struct AutocastState {
|
||||
struct GlobalStateGuard {
|
||||
PyObject_HEAD
|
||||
|
||||
inline void init() {
|
||||
void init() {
|
||||
auto& ctx = at::globalContext();
|
||||
_grad_mode = at::GradMode::is_enabled();
|
||||
_autocast_state = AutocastState();
|
||||
@ -643,7 +643,7 @@ struct GlobalStateGuard {
|
||||
_default_dtype = at::get_default_dtype();
|
||||
}
|
||||
|
||||
inline bool check() const {
|
||||
bool check() const {
|
||||
auto& ctx = at::globalContext();
|
||||
return (_grad_mode == at::GradMode::is_enabled() &&
|
||||
_autocast_state == AutocastState() &&
|
||||
@ -663,7 +663,7 @@ struct GlobalStateGuard {
|
||||
_default_dtype == at::get_default_dtype();
|
||||
}
|
||||
|
||||
inline std::string reason() const {
|
||||
std::string reason() const {
|
||||
std::ostringstream os;
|
||||
auto& ctx = at::globalContext();
|
||||
if (_grad_mode != at::GradMode::is_enabled())
|
||||
|
@ -418,7 +418,7 @@ void OSSProxyExecutor::get_input_info_from_serialized(
|
||||
const auto& arg = named_argument["arg"];
|
||||
const auto& name = named_argument["name"].get<std::string>();
|
||||
|
||||
std::string custom_obj_name = "";
|
||||
std::string custom_obj_name;
|
||||
if (arg.contains("as_custom_obj")) {
|
||||
custom_obj_name = arg["as_custom_obj"]["name"].get<std::string>();
|
||||
}
|
||||
|
@ -1374,11 +1374,8 @@ void aoti_torch_warn(
|
||||
const char* file,
|
||||
uint32_t line,
|
||||
const char* msg) {
|
||||
::c10::warn(::c10::Warning(
|
||||
::c10::UserWarning(),
|
||||
{func, file, static_cast<uint32_t>(line)},
|
||||
msg,
|
||||
false));
|
||||
::c10::warn(
|
||||
::c10::Warning(::c10::UserWarning(), {func, file, line}, msg, false));
|
||||
}
|
||||
|
||||
AOTITorchError aoti_torch__alloc_from_pool(
|
||||
|
@ -37,8 +37,8 @@ class EventHandlers {
|
||||
}
|
||||
|
||||
private:
|
||||
std::mutex mu_{};
|
||||
std::vector<std::shared_ptr<EventHandler>> handlers_{};
|
||||
std::mutex mu_;
|
||||
std::vector<std::shared_ptr<EventHandler>> handlers_;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
|
@ -109,15 +109,15 @@ struct TORCH_API ExecutionTraceObserver { // NOLINT
|
||||
using ID = size_t;
|
||||
|
||||
// Mapping of each thread to its own operator stack
|
||||
std::map<size_t, std::stack<ID>> opStack{};
|
||||
std::map<size_t, std::stack<ID>> opStack;
|
||||
// Uses the underlying TensorImpl object pointer as the key and map to its
|
||||
// unique id.
|
||||
std::map<const void*, ID> objectId{};
|
||||
std::map<const void*, ID> objectId;
|
||||
|
||||
using weak_storage_ptr = c10::weak_intrusive_ptr<StorageImpl>;
|
||||
std::unordered_map<const void*, ID> data_ptr_to_storage_id{};
|
||||
std::unordered_map<const void*, ID> data_ptr_to_storage_id;
|
||||
std::unordered_map<const void*, weak_storage_ptr>
|
||||
data_ptr_to_weak_storage_ptr{};
|
||||
data_ptr_to_weak_storage_ptr;
|
||||
|
||||
ID get_tensor_storage_ID(const c10::Storage& t_storage) {
|
||||
const std::lock_guard<std::recursive_mutex> lock(gMutex);
|
||||
@ -152,21 +152,21 @@ struct TORCH_API ExecutionTraceObserver { // NOLINT
|
||||
enum class RunState { uninitialized, disabled, enabled };
|
||||
|
||||
// Mutex for multithreaded access to the shared containers.
|
||||
std::recursive_mutex gMutex{};
|
||||
std::recursive_mutex gMutex;
|
||||
// Stream to write output JSON.
|
||||
std::ofstream out{};
|
||||
std::ofstream out;
|
||||
|
||||
// Full path to the output file.
|
||||
std::string fileName{};
|
||||
std::string fileName;
|
||||
|
||||
std::string resourceDir{};
|
||||
std::string resourceDir;
|
||||
|
||||
// RecordFunction callback handle for this observer.
|
||||
CallbackHandle cbHandle{INVALID_CALLBACK_HANDLE};
|
||||
|
||||
// Process ID.
|
||||
int32_t pid{-1};
|
||||
std::string recordTime{};
|
||||
std::string recordTime;
|
||||
|
||||
ExecutionTraceObserver() = default;
|
||||
|
||||
@ -193,7 +193,7 @@ struct TORCH_API ExecutionTraceObserver { // NOLINT
|
||||
|
||||
bool record_integral_tensor_range{false};
|
||||
|
||||
std::unordered_set<std::string> nodeListForSavingIntegerTensor{};
|
||||
std::unordered_set<std::string> nodeListForSavingIntegerTensor;
|
||||
|
||||
private:
|
||||
static bool callbackShouldBeEnabled(RunState run_state) {
|
||||
|
Reference in New Issue
Block a user