Compare commits

...

3 Commits

Author SHA1 Message Date
cyy
67e094992e Revert changes 2025-09-21 09:20:22 +00:00
cyy
14622ff376 More fixes 2025-09-21 09:20:22 +00:00
cyy
35d5ec3d85 Fix clang-tidy warnings of performance 2025-09-21 09:20:22 +00:00
41 changed files with 114 additions and 103 deletions

View File

@ -94,10 +94,10 @@ inline at::DimVector infer_size_dv(IntArrayRef shape, int64_t numel) {
inline at::SymDimVector infer_size_dv(
c10::SymIntArrayRef shape,
c10::SymInt numel) {
const c10::SymInt& numel) {
auto res = at::SymDimVector(shape);
infer_size_impl<c10::SymIntArrayRef, c10::SymInt, at::SymDimVector>(
shape, std::move(numel), res);
shape, numel, res);
return res;
}

View File

@ -6,7 +6,6 @@
#include <c10/util/TypeList.h>
#include <c10/util/intrusive_ptr.h>
#include <c10/util/order_preserving_flat_hash_map.h>
#include <optional>
#include <ATen/core/TensorBody.h>
#include <ATen/core/jit_type_base.h>

View File

@ -55,8 +55,7 @@ class TORCH_API CppSignature final {
}
private:
explicit CppSignature(std::type_index signature)
: signature_(std::move(signature)) {}
explicit CppSignature(std::type_index signature) : signature_(signature) {}
std::type_index signature_;
};

View File

@ -70,7 +70,7 @@ private:
void _print_dispatch_trace(const std::string& label, const std::string& op_name, const DispatchKeySet& dispatchKeySet) {
auto nesting_value = dispatch_trace_nesting_value();
for (int64_t i = 0; i < nesting_value; ++i) std::cerr << " ";
std::cerr << label << " op=[" << op_name << "], key=[" << toString(dispatchKeySet.highestPriorityTypeId()) << "]" << std::endl;
std::cerr << label << " op=[" << op_name << "], key=[" << toString(dispatchKeySet.highestPriorityTypeId()) << "]" << '\n';
}
} // namespace detail
@ -213,9 +213,11 @@ OperatorHandle Dispatcher::findOrRegisterName_(const OperatorName& op_name) {
// Windows build doesn't produce the destructor symbol in PyTorch libs
// causing a linker failure in downstream projects.
// x-ref https://github.com/pytorch/pytorch/issues/70032
#if defined(_WIN32)
OperatorHandle::~OperatorHandle() = default;
#endif
RegistrationHandleRAII Dispatcher::registerLibrary(std::string ns, std::string debug) {
RegistrationHandleRAII Dispatcher::registerLibrary(const std::string& ns, std::string debug) {
std::lock_guard<std::mutex> lock(guard_->mutex);
auto found = libraries_.find(ns);
TORCH_CHECK(
@ -306,7 +308,7 @@ PythonModuleMapType& pythonModulesSingleton() {
}
std::optional<std::pair<const char*, const char*>> Dispatcher::getPyStub(OperatorName op_name) {
std::optional<std::pair<const char*, const char*>> Dispatcher::getPyStub(const OperatorName& op_name) {
std::lock_guard<std::mutex> lock(guard_->mutex);
auto found = pythonModulesSingleton().find(op_name);
if (found == pythonModulesSingleton().end()) {
@ -342,7 +344,7 @@ RegistrationHandleRAII Dispatcher::registerPythonModule(
});
}
void Dispatcher::throwIfHasPythonModule(OperatorName op_name) {
void Dispatcher::throwIfHasPythonModule(const OperatorName& op_name) {
std::lock_guard<std::mutex> lock(guard_->mutex);
auto elt = pythonModulesSingleton().find(op_name);
if (elt == pythonModulesSingleton().end()) {
@ -362,7 +364,7 @@ void Dispatcher::throwIfHasPythonModule(OperatorName op_name) {
}
RegistrationHandleRAII Dispatcher::registerImpl(
OperatorName op_name,
const OperatorName& op_name,
std::optional<DispatchKey> dispatch_key,
KernelFunction kernel,
std::optional<impl::CppSignature> cpp_signature,
@ -377,7 +379,7 @@ RegistrationHandleRAII Dispatcher::registerImpl(
*this,
dispatch_key,
std::move(kernel),
std::move(cpp_signature),
cpp_signature,
std::move(inferred_function_schema),
std::move(debug)
);
@ -406,7 +408,7 @@ void Dispatcher::deregisterImpl_(const OperatorHandle& op, const OperatorName& o
cleanup(op, op_name);
}
RegistrationHandleRAII Dispatcher::registerName(OperatorName op_name) {
RegistrationHandleRAII Dispatcher::registerName(const OperatorName& op_name) {
std::lock_guard<std::mutex> lock(guard_->mutex);
auto op = findOrRegisterName_(op_name);
++op.operatorDef_->def_and_impl_count;

View File

@ -13,15 +13,10 @@
#include <condition_variable>
#include <list>
#include <mutex>
#include <type_traits>
#include <ATen/core/enum_tag.h>
#include <ATen/core/grad_mode.h>
#ifndef NDEBUG
#include <iostream>
#endif
namespace c10 {
TORCH_API bool show_dispatch_trace();
@ -255,7 +250,7 @@ class TORCH_API Dispatcher final {
// NB: steals the inferred function schema, as we may need to hold on to
// it for a bit until the real schema turns up
RegistrationHandleRAII registerImpl(
OperatorName op_name,
const OperatorName& op_name,
std::optional<DispatchKey> dispatch_key,
KernelFunction kernel,
std::optional<impl::CppSignature> cpp_signature,
@ -274,15 +269,15 @@ class TORCH_API Dispatcher final {
/**
* Given an operator, throws if we have a pystub.
*/
void throwIfHasPythonModule(OperatorName op_name);
void throwIfHasPythonModule(const OperatorName& op_name);
std::optional<std::pair<const char*, const char*>> getPyStub(
OperatorName op_name);
const OperatorName& op_name);
/**
* Register a new operator by name.
*/
RegistrationHandleRAII registerName(OperatorName op_name);
RegistrationHandleRAII registerName(const OperatorName& op_name);
/**
* Register a fallback kernel for a backend.
@ -300,7 +295,9 @@ class TORCH_API Dispatcher final {
* API. These invocations are only permitted once per program, so we raise
* an error if this is called again for the same namespace.
*/
RegistrationHandleRAII registerLibrary(std::string ns, std::string debug);
RegistrationHandleRAII registerLibrary(
const std::string& ns,
std::string debug);
// ------------------------------------------------------------------------
//
@ -448,8 +445,12 @@ class TORCH_API OperatorHandle {
OperatorHandle& operator=(OperatorHandle&&) noexcept = default;
OperatorHandle(const OperatorHandle&) = default;
OperatorHandle& operator=(const OperatorHandle&) = default;
#if defined(_WIN32)
// NOLINTNEXTLINE(performance-trivially-destructible)
~OperatorHandle();
#else
~OperatorHandle() = default;
#endif
const OperatorName& operator_name() const {
return operatorDef_->op.operator_name();

View File

@ -556,7 +556,7 @@ inline std::ostream& operator<<(std::ostream& out, const Argument& arg) {
// real_type versus fake_type: in order to be compatible with FunctionSchema
// parser, printing an argument with either MemoryFormat or Layout type should
// give us the original schema string, hence printing out real_type.
auto type = arg.real_type();
const auto& type = arg.real_type();
bool is_opt = type->kind() == OptionalType::Kind;
auto unopt_type = is_opt ? type->castRaw<OptionalType>()->getElementType() : type;

View File

@ -232,7 +232,7 @@ struct TORCH_API OptionalType : public UnionType {
static TypePtr ofTensor();
//
// global singleton
static TypePtr get(TypePtr inner);
static TypePtr get(const TypePtr& inner);
private:
explicit OptionalType(const TypePtr& contained);
@ -895,7 +895,7 @@ struct TORCH_API ListType
// the type List<T>.
// The extra "identifier" argument is needed beccause we have multiple container types
// that all re-use this function (List<T>, array<T, N>, etc.)
static TypePtr get(const std::string& identifier, TypePtr inner);
static TypePtr get(const std::string& identifier, const TypePtr& inner);
// common cast List[Tensor]
static ListTypePtr ofTensors();

View File

@ -274,7 +274,7 @@ ListTypePtr ListType::ofNumbers() {
return value;
}
TypePtr OptionalType::get(TypePtr inner) {
TypePtr OptionalType::get(const TypePtr& inner) {
static ska::flat_hash_map<TypePtr, TypePtr> containerTypePtrs;
static std::mutex mutex;
// Perf from the lock is ok because this function is guarded behind
@ -287,7 +287,7 @@ TypePtr OptionalType::get(TypePtr inner) {
return containerTypePtrs[inner];
}
TypePtr ListType::get(const std::string& identifier, TypePtr inner) {
TypePtr ListType::get(const std::string& identifier, const TypePtr& inner) {
static ska::flat_hash_map<std::tuple<std::string, TypePtr>, TypePtr> containerTypePtrs;
static std::mutex mutex;
// Perf from the lock is ok because this function is guarded behind

View File

@ -122,7 +122,7 @@ struct DeviceThreadHandlePool : public std::enable_shared_from_this<DeviceThread
// Called by the destructor. Releases this thread's handles back into the pool.
void release() {
if(my_handles.size() > 0) {
if(!my_handles.empty()) {
auto parent = weak_parent.lock();
if (!parent) {
// If this thread exits after atexit handlers have completed, the

View File

@ -139,7 +139,7 @@ static void autogradBasedTransformSendToNext(
std::bitset<default_bitset_size> outputs_aliasing_immutable; // set = 1 for all bits
if(!grad_special_case) {
for (auto idx = stack->size() - args_size; idx < stack->size(); idx++) {
const auto ivalue = (*stack)[idx];
const auto& ivalue = (*stack)[idx];
if (!ivalue.isTensor()) {
continue; // only input that can be aliased is a tensor, not a tensor list (expect in ops without returns)
}

View File

@ -6,6 +6,8 @@
#include <ATen/functorch/BatchRulesHelper.h>
#include <algorithm>
namespace at::functorch {
typedef std::tuple<Tensor, std::optional<int64_t>> oneOutput;
@ -315,7 +317,7 @@ oneOutput linalg_lu_solve_batch_rule(
const auto LU_num_batch_dims = rankWithoutBatchDim(LU_, LU_bdim) - LU_min_rank;
const auto pivots_num_batch_dims = rankWithoutBatchDim(pivots_, pivots_bdim) - pivots_min_rank;
const auto B_num_batch_dims = rankWithoutBatchDim(B_, B_bdim) - B_min_rank;
const auto max_num_batch_dims = std::max(std::max(LU_num_batch_dims, pivots_num_batch_dims), B_num_batch_dims);
const auto max_num_batch_dims = std::max({LU_num_batch_dims, pivots_num_batch_dims, B_num_batch_dims});
LU_ = maybePadToLogicalRank(LU_, LU_bdim, max_num_batch_dims + LU_min_rank);
pivots_ = maybePadToLogicalRank(pivots_, pivots_bdim, max_num_batch_dims + pivots_min_rank);

View File

@ -897,11 +897,11 @@ Tensor& div_(Tensor& self, const Scalar& other) {
}
Tensor div(const Tensor& self, const Scalar& other, std::optional<std::string_view> rounding_mode) {
return self.div(wrapped_scalar_tensor(other), std::move(rounding_mode)); // redispatch!
return self.div(wrapped_scalar_tensor(other), rounding_mode); // redispatch!
}
Tensor& div_(Tensor& self, const Scalar& other, std::optional<std::string_view> rounding_mode) {
return self.div_(wrapped_scalar_tensor(other), std::move(rounding_mode)); // redispatch!
return self.div_(wrapped_scalar_tensor(other), rounding_mode); // redispatch!
}
// divide, alias for div
@ -926,23 +926,23 @@ Tensor& divide_(Tensor& self, const Scalar& other) {
}
Tensor& divide_out(const Tensor& self, const Tensor& other, std::optional<std::string_view> rounding_mode, Tensor& result) {
return at::div_out(result, self, other, std::move(rounding_mode));
return at::div_out(result, self, other, rounding_mode);
}
Tensor divide(const Tensor& self, const Tensor& other, std::optional<std::string_view> rounding_mode) {
return self.div(other, std::move(rounding_mode));
return self.div(other, rounding_mode);
}
Tensor& divide_(Tensor& self, const Tensor& other, std::optional<std::string_view> rounding_mode) {
return self.div_(other, std::move(rounding_mode));
return self.div_(other, rounding_mode);
}
Tensor divide(const Tensor& self, const Scalar& other, std::optional<std::string_view> rounding_mode) {
return self.div(other, std::move(rounding_mode));
return self.div(other, rounding_mode);
}
Tensor& divide_(Tensor& self, const Scalar& other, std::optional<std::string_view> rounding_mode) {
return self.div_(other, std::move(rounding_mode));
return self.div_(other, rounding_mode);
}
// true_divide, an alias for div

View File

@ -150,7 +150,7 @@ void histogramdd_prepare_out(const Tensor& input, const std::vector<int64_t>& bi
void histogramdd_prepare_out(const Tensor& input, TensorList bins,
const Tensor& hist, const TensorList& bin_edges) {
std::vector<int64_t> bin_ct(bins.size());
std::transform(bins.begin(), bins.end(), bin_ct.begin(), [](Tensor t) { return t.numel() - 1; });
std::transform(bins.begin(), bins.end(), bin_ct.begin(), [](const Tensor& t) { return t.numel() - 1; });
histogramdd_prepare_out(input, bin_ct, hist, bin_edges);
}

View File

@ -360,7 +360,7 @@ Tensor einsum(std::string_view equation, TensorList operands, at::OptionalIntArr
// to compute the number of dimensions covered by ellipsis.
for(const auto i : c10::irange(num_ops)) {
const auto& operand = operands[i];
const auto labels = op_labels[i];
const auto& labels = op_labels[i];
const auto ndims = operand.dim();
int64_t nlabels = static_cast<int64_t>(labels.size());
bool has_ellipsis = false;

View File

@ -237,7 +237,7 @@ TORCH_META_FUNC(linalg_vector_norm)(const Tensor& self, const Scalar& scalar_ord
at::detail::check_linalg_norm_dtype(opt_dtype, self.scalar_type(), "linalg.vector_norm");
auto mask = at::native::make_dim_mask(dim, self.dim());
auto shape = at::native::shape_from_dim_mask(self, std::move(mask), keepdim);
auto shape = at::native::shape_from_dim_mask(self, mask, keepdim);
auto options = self.options()
.dtype(toRealValueType(opt_dtype.value_or(self.scalar_type())));
@ -641,7 +641,7 @@ namespace {
Tensor linalg_matrix_power_impl(
const Tensor& self,
int64_t n,
std::optional<Tensor> _out) {
const std::optional<Tensor>& _out) {
NoTF32Guard disable_tf32;
auto out = _out.value_or(Tensor());
@ -1019,7 +1019,7 @@ Tensor multi_dot_impl(TensorList _tensors, std::optional<Tensor> _out) {
Tensor result;
if (_out.has_value()) {
auto out = *_out;
const auto& out = *_out;
TORCH_CHECK(
dtype == out.dtype(),
"multi_dot(): expected out tensor to have dtype ",

View File

@ -493,7 +493,7 @@ Tensor get_clamped_target_length(
// the gradient is implemented for _cudnn_ctc_loss (just in derivatives.yaml) and _ctc_loss and this function has automatic gradients
// it also handles the reduction if desired
template <typename LengthsType>
Tensor ctc_loss_impl(const Tensor& log_probs_, const Tensor& targets, LengthsType input_lengths, LengthsType target_lengths, int64_t BLANK, int64_t reduction, bool zero_infinity) {
Tensor ctc_loss_impl(const Tensor& log_probs_, const Tensor& targets, const LengthsType& input_lengths, const LengthsType& target_lengths, int64_t BLANK, int64_t reduction, bool zero_infinity) {
auto is_batched = log_probs_.dim() == 3;
Tensor log_probs = is_batched ? log_probs_ : log_probs_.unsqueeze(1);
bool use_cudnn =

View File

@ -599,7 +599,7 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, int64_t> _batch_norm_impl_index(
check_dims_match_num_input_features("weight", num_features, weight.sym_numel());
}
if (bias.defined()) {
check_dims_match_num_input_features("bias", std::move(num_features), bias.sym_numel());
check_dims_match_num_input_features("bias", num_features, bias.sym_numel());
}
BatchNormBackend backend = _select_batch_norm_backend(input, weight, bias, running_mean, running_var, training, eps);
@ -923,7 +923,7 @@ std::tuple<Tensor, Tensor, Tensor> _batch_norm_legit_no_stats_cpu(
std::tuple<Tensor, Tensor, Tensor> _batch_norm_legit_no_training(
const Tensor& self, const std::optional<Tensor>& weight_opt, const std::optional<Tensor>& bias_opt,
const Tensor& running_mean, const Tensor& running_var, double momentum, double eps) {
return at::_native_batch_norm_legit(self, weight_opt, bias_opt, const_cast<Tensor&>(running_mean), const_cast<Tensor&>(running_var), /*train=*/false, momentum, eps);
return at::_native_batch_norm_legit(self, weight_opt, bias_opt, const_cast<Tensor&>(running_mean), const_cast<Tensor&>(running_var), /*training=*/false, momentum, eps);
}

View File

@ -1533,7 +1533,7 @@ std::tuple<Tensor, Tensor> lstm_cell(
check_rnn_cell_forward_input(input, w_ih.sym_size(1));
auto hidden_size = w_hh.sym_size(1);
check_rnn_cell_forward_hidden(input, hx[0], hidden_size, 0);
check_rnn_cell_forward_hidden(input, hx[1], std::move(hidden_size), 1);
check_rnn_cell_forward_hidden(input, hx[1], hidden_size, 1);
static at::Tensor undefined;
return LSTMCell<CellParams>{}(input, std::make_tuple(hx[0], hx[1]), CellParams{w_ih, w_hh, b_ih, b_hh, undefined});
}
@ -1612,13 +1612,13 @@ std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor> _thnn_differentiable_gru_cell
h_g = h_g + hidden_bias;
}
auto chunked_input_gates = in_g.unsafe_chunk(3, 1);
Tensor ir = chunked_input_gates[0];
Tensor ii = chunked_input_gates[1];
Tensor in = chunked_input_gates[2];
const Tensor& ir = chunked_input_gates[0];
const Tensor& ii = chunked_input_gates[1];
const Tensor& in = chunked_input_gates[2];
auto chunked_hidden_gates = h_g.unsafe_chunk(3, 1);
Tensor hr = chunked_hidden_gates[0];
Tensor hi = chunked_hidden_gates[1];
Tensor hn = chunked_hidden_gates[2];
const Tensor& hr = chunked_hidden_gates[0];
const Tensor& hi = chunked_hidden_gates[1];
const Tensor& hn = chunked_hidden_gates[2];
Tensor rg = (ir + hr).sigmoid();
Tensor ig = (ii + hi).sigmoid();
Tensor grad_hx = grad_hy * ig;

View File

@ -409,17 +409,17 @@ static inline Tensor& unary_op_impl_out(Tensor& result, const Tensor& self, Stub
}
template <typename Stub, typename ...Args>
static inline Tensor& unary_op_impl_float_out(Tensor& result, const Tensor& self, Stub& stub, Args... args) {
static inline Tensor& unary_op_impl_float_out(Tensor& result, const Tensor& self, Stub& stub, Args&&... args) {
auto iter = TensorIterator::unary_float_op(result, self);
stub(iter.device_type(), iter, args...);
stub(iter.device_type(), iter, std::forward<Args>(args)...);
return result;
}
template <typename Stub, typename ...Args>
static inline Tensor unary_op_impl_float(const Tensor& self, Stub& stub, Args... args) {
static inline Tensor unary_op_impl_float(const Tensor& self, Stub& stub, Args&&... args) {
Tensor result;
auto iter = TensorIterator::unary_float_op(result, self);
stub(iter.device_type(), iter, args...);
stub(iter.device_type(), iter, std::forward<Args>(args)...);
return iter.output();
}

View File

@ -323,7 +323,7 @@ std::tuple<Tensor, Tensor, Tensor> unique_consecutive_cpu_template(
template<class ForwardIt>
ForwardIt _unique_dim_cpu_impl(ForwardIt first, ForwardIt last,
std::vector<int64_t>& indices, Tensor inverse_indices_vec, Tensor counts) {
std::vector<int64_t>& indices, const Tensor& inverse_indices_vec, const Tensor& counts) {
if (first == last) {
return last;
}

View File

@ -24,7 +24,7 @@ constexpr int64_t num_output_channels_index [[maybe_unused]] = 10;
constexpr int64_t num_input_channels_index [[maybe_unused]] = 11;
template <typename TENSOR_DTYPE, typename VEC_DTYPE>
std::vector<VEC_DTYPE> unwrap_vector(at::Tensor tensor) {
std::vector<VEC_DTYPE> unwrap_vector(const at::Tensor& tensor) {
std::vector<VEC_DTYPE> vec(tensor.numel());
TENSOR_DTYPE* tensor_data_ptr = tensor.data_ptr<TENSOR_DTYPE>();
std::copy(tensor_data_ptr, tensor_data_ptr + tensor.numel(), vec.data());
@ -39,7 +39,7 @@ std::vector<VEC_DTYPE> unwrap_vector(at::Tensor tensor) {
*/
void unpack_bcsr(
int8_t* dst,
ao::sparse::BCSR bcsr,
const ao::sparse::BCSR& bcsr,
const int64_t R,
const int64_t C,
const int64_t RB,

View File

@ -35,7 +35,7 @@ C10_ALWAYS_INLINE void _check_rms_norm_inputs_symint(
std::stringstream ss;
ss << "Given normalized_shape=" << normalized_shape
<< ", expected input with shape [*";
for (auto size : normalized_shape) {
for (const auto& size : normalized_shape) {
ss << ", " << size;
}
ss << "], but got input of size" << input_shape;

View File

@ -77,7 +77,7 @@ static Tensor NestedTensor_elementwise_Tensor(
const Tensor& other,
const std::string& op_name,
bool supports_striding,
Func f) {
const Func& f) {
Tensor self_contiguous = self;
Tensor other_contiguous = other;
// self is a scalar
@ -238,7 +238,7 @@ static Tensor& NestedTensor_elementwise__Tensor(
Tensor& self,
const Tensor& other,
const std::string& op_name,
Func f) {
const Func& f) {
// self is a scalar
if (!self.is_nested() && self.dim() == 0 && self.numel() == 1) {
auto other_impl = get_nested_tensor_impl(other);

View File

@ -149,7 +149,7 @@ Tensor MakeStridedQTensorCPU(
const IntArrayRef& sizes,
const IntArrayRef& strides,
const TensorOptions& options,
QuantizerPtr quantizer) {
const QuantizerPtr& quantizer) {
AT_ASSERT(options.device().is_cpu());
at::native::check_size_nonnegative(sizes);
auto* allocator = at::getCPUAllocator();

View File

@ -37,7 +37,7 @@ struct TORCH_API PackedLinearWeight : public LinearPackedParamsBase {
col_offsets(std::move(col_offsets)),
w_scale(std::move(w_scale)),
w_zp(std::move(w_zp)),
q_scheme(std::move(q_scheme)) {}
q_scheme(q_scheme) {}
std::unique_ptr<fbgemm::PackBMatrix<int8_t>> w;
std::optional<at::Tensor> bias_;
std::vector<int32_t> col_offsets;
@ -316,7 +316,7 @@ Tensor MakeStridedQTensorCPU(
const IntArrayRef& sizes,
const IntArrayRef& strides,
const TensorOptions& options,
QuantizerPtr quantizer);
const QuantizerPtr& quantizer);
Tensor MakeEmptyAffineQuantizedChannelsLast3dTensor(
int64_t N,

View File

@ -7,7 +7,7 @@ QTensorImpl::QTensorImpl(
DispatchKeySet key_set,
const caffe2::TypeMeta data_type,
QuantizerPtr quantizer)
: TensorImpl(std::move(storage), std::move(key_set), data_type),
: TensorImpl(std::move(storage), key_set, data_type),
quantizer_(std::move(quantizer)) {}
QTensorImpl::QTensorImpl(
@ -16,7 +16,7 @@ QTensorImpl::QTensorImpl(
DispatchKeySet key_set,
const caffe2::TypeMeta data_type,
QuantizerPtr quantizer)
: TensorImpl(type, std::move(storage), std::move(key_set), data_type),
: TensorImpl(type, std::move(storage), key_set, data_type),
quantizer_(std::move(quantizer)) {}
const char* QTensorImpl::tensorimpl_type_name() const {

View File

@ -4,6 +4,8 @@
#include <c10/core/TensorImpl.h>
#include <c10/util/Exception.h>
#include <utility>
namespace at {
/**
@ -36,7 +38,7 @@ struct TORCH_API QTensorImpl : public c10::TensorImpl {
}
void set_quantizer_(QuantizerPtr quantizer) {
quantizer_ = quantizer;
quantizer_ = std::move(quantizer);
}
/**

View File

@ -107,7 +107,7 @@ static int64_t get_sub_byte_tensor_size(IntArrayRef sizes, size_t dtype_itemsize
inline Tensor new_qtensor(
IntArrayRef sizes,
const TensorOptions& options,
QuantizerPtr quantizer) {
const QuantizerPtr& quantizer) {
auto memory_format = options.memory_format_opt().value_or(MemoryFormat::Contiguous);
auto device = options.device();
at::Allocator* allocator = nullptr;
@ -338,7 +338,7 @@ Tensor from_blob_quantized_per_tensor_affine(
const std::size_t datasize = size * itemsize;
DataPtr data_ptr = InefficientStdFunctionContext::makeDataPtr(
data, deleter, options.device());
data, std::move(deleter), options.device());
Storage storage{Storage::use_byte_size_t{}, datasize, std::move(data_ptr)};
@ -411,7 +411,7 @@ Tensor from_blob_quantized_per_channel_affine(
const std::size_t datasize = size * itemsize;
DataPtr data_ptr = InefficientStdFunctionContext::makeDataPtr(
data, deleter, options.device());
data, std::move(deleter), options.device());
Storage storage{Storage::use_byte_size_t{}, datasize, std::move(data_ptr)};

View File

@ -196,8 +196,8 @@ struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffine
Tensor zero_points,
int64_t axis)
: PerChannelAffineQuantizer(scalar_type,
scales,
zero_points,
std::move(scales),
std::move(zero_points),
axis) {}
QScheme qscheme() const override {
@ -246,7 +246,7 @@ TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type);
TORCH_API Tensor new_qtensor(
IntArrayRef sizes,
const TensorOptions& options,
QuantizerPtr quantizer);
const QuantizerPtr& quantizer);
TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer);

View File

@ -396,7 +396,8 @@ size_t PyTorchStreamReader::getRecordMultiReaders(
size_t perThreadSize = (n + nthread - 1) / nthread;
std::vector<size_t> readSizes(nthread, 0);
std::lock_guard<std::mutex> guard(reader_lock_);
for (size_t i = 0; i < nthread; i++) {
loaderThreads.reserve(nthread);
for (size_t i = 0; i < nthread; i++) {
loaderThreads.emplace_back([this,
name,
i,
@ -415,7 +416,7 @@ size_t PyTorchStreamReader::getRecordMultiReaders(
size =
read(recordOff + startPos, (char*)dst + startPos, threadReadSize);
} else {
auto reader = additionalReaders[i - 1];
const auto& reader = additionalReaders[i - 1];
size = reader->read(
recordOff + startPos, (char*)dst + startPos, threadReadSize);
}
@ -641,7 +642,7 @@ size_t PyTorchStreamReader::getRecordSize(const std::string& name) {
size_t PyTorchStreamReader::getRecordOffsetNoRead(
size_t cursor,
std::string filename,
const std::string& filename,
size_t size,
uint64_t alignment) {
std::string full_name = archive_name_plus_slash_ + filename;
@ -697,7 +698,7 @@ PyTorchStreamWriter::PyTorchStreamWriter(
}
PyTorchStreamWriter::PyTorchStreamWriter(
const std::function<size_t(const void*, size_t)> writer_func,
const std::function<size_t(const void*, size_t)>& writer_func,
bool compute_crc32,
uint64_t alignment)
: archive_name_("archive"),
@ -712,7 +713,7 @@ void PyTorchStreamWriter::setup(const string& file_name) {
memset(ar_.get(), 0, sizeof(mz_zip_archive));
archive_name_plus_slash_ = archive_name_ + "/"; // for writeRecord().
if (archive_name_.size() == 0) {
if (archive_name_.empty()) {
CAFFE_THROW("invalid file name: ", file_name);
}

View File

@ -180,7 +180,7 @@ class TORCH_API PyTorchStreamReader final {
size_t getRecordOffset(const std::string& name);
size_t getRecordOffsetNoRead(
size_t cursor,
std::string filename,
const std::string& filename,
size_t size,
uint64_t alignment);
bool hasRecord(const std::string& name);
@ -232,7 +232,7 @@ class TORCH_API PyTorchStreamWriter final {
bool compute_crc32 = true,
uint64_t alignment = 64);
explicit PyTorchStreamWriter(
const std::function<size_t(const void*, size_t)> writer_func,
const std::function<size_t(const void*, size_t)>& writer_func,
bool compute_crc32 = true,
uint64_t alignment = 64);

View File

@ -114,8 +114,8 @@ inline Tensor mse_loss(
}
std::vector<torch::Tensor> broadcast_tensors =
torch::broadcast_tensors({input, target});
auto expanded_input = broadcast_tensors[0];
auto expanded_target = broadcast_tensors[1];
const auto& expanded_input = broadcast_tensors[0];
const auto& expanded_target = broadcast_tensors[1];
return torch::mse_loss(
expanded_input, expanded_target, enumtype::reduction_get_enum(reduction));
}

View File

@ -7,6 +7,7 @@
#include <torch/csrc/utils/pybind.h>
#include <string>
#include <utility>
namespace py = pybind11;
@ -30,9 +31,7 @@ namespace pybind11::detail {
} \
}
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
ITEM_TYPE_CASTER(torch::Tensor, Tensor);
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
ITEM_TYPE_CASTER(std::shared_ptr<torch::nn::Module>, Module);
} // namespace pybind11::detail
@ -42,7 +41,7 @@ template <typename T>
void bind_ordered_dict(py::module module, const char* dict_name) {
using ODict = OrderedDict<std::string, T>;
// clang-format off
py::class_<ODict>(module, dict_name)
py::class_<ODict>(std::move(module), dict_name)
.def("items", &ODict::items)
.def("keys", &ODict::keys)
.def("values", &ODict::values)

View File

@ -594,17 +594,20 @@ Tensor masked_fill_backward(const Tensor& grad, const Tensor& mask) {
}
template <typename T>
Tensor mul_tensor_backward(const Tensor& grad, T other, ScalarType self_st) {
Tensor mul_tensor_backward(
const Tensor& grad,
const T& other,
ScalarType self_st) {
auto out = grad * other.conj();
return handle_r_to_c(self_st, std::move(out));
}
template Tensor mul_tensor_backward(const Tensor&, Tensor, ScalarType);
template Tensor mul_tensor_backward(const Tensor&, Scalar, ScalarType);
template Tensor mul_tensor_backward(const Tensor&, const Tensor&, ScalarType);
template Tensor mul_tensor_backward(const Tensor&, const Scalar&, ScalarType);
template <typename T>
Tensor div_tensor_self_backward(
const Tensor& grad,
T other,
const T& other,
ScalarType self_st,
const std::optional<std::string_view>& rounding_mode) {
if (rounding_mode.has_value()) {
@ -616,12 +619,12 @@ Tensor div_tensor_self_backward(
}
template Tensor div_tensor_self_backward(
const Tensor&,
Tensor,
const Tensor&,
ScalarType,
const std::optional<std::string_view>&);
template Tensor div_tensor_self_backward(
const Tensor&,
Scalar,
const Scalar&,
ScalarType,
const std::optional<std::string_view>&);

View File

@ -137,11 +137,14 @@ at::Tensor pow_backward_exponent(
const at::Tensor& result);
at::Tensor angle_backward(const at::Tensor& grad, const at::Tensor& self);
template <typename T>
at::Tensor mul_tensor_backward(const Tensor& grad, T other, ScalarType self_st);
at::Tensor mul_tensor_backward(
const Tensor& grad,
const T& other,
ScalarType self_st);
template <typename T>
at::Tensor div_tensor_self_backward(
const Tensor& grad,
T other,
const T& other,
ScalarType self_st,
const std::optional<std::string_view>& rounding_mode = std::nullopt);
at::Tensor div_tensor_other_backward(

View File

@ -1366,7 +1366,7 @@ static PyObject* pop_torch_dispatch_stack(
"Attempted to unset ",
c10::impl::to_string(mode_key.value()),
", but there wasn't one active.");
auto mode = maybe_mode.value();
const auto& mode = maybe_mode.value();
r = mode->ptr(getPyInterpreter());
} else {
auto mode = c10::impl::TorchDispatchModeTLS::pop_stack();

View File

@ -1205,7 +1205,7 @@ class AsyncReduceWork : public ProcessGroupGloo::AsyncWork {
protected:
template <typename T>
void getFunction(gloo::ReduceOptions::Func& fn, const ReduceOp op) {
void getFunction(gloo::ReduceOptions::Func& fn, const ReduceOp& op) {
fn = toFunction<T>(op);
}

View File

@ -163,8 +163,8 @@ struct CollectiveFingerPrint {
backend->allgather(output_tensors, tensors_to_verify)->wait();
// Verify equivalence
for (const auto i : c10::irange(output_tensors.size())) {
const std::vector<at::Tensor> gathered_tensors = output_tensors[i];
const at::Tensor reference_tensor = tensors_to_verify[i];
const std::vector<at::Tensor>& gathered_tensors = output_tensors[i];
const at::Tensor& reference_tensor = tensors_to_verify[i];
for (const auto rank : c10::irange(gathered_tensors.size())) {
const auto& rank_tensor = gathered_tensors[rank];
if (!rank_tensor.equal(reference_tensor)) {

View File

@ -1280,11 +1280,11 @@ void aoti_torch_print_tensor_handle(AtenTensorHandle self, const char* msg) {
// Print dtypes and for float types, print exact precision
auto scalarType = t->scalar_type();
if (scalarType == at::ScalarType::Float) {
std::cout << "Dtype: float32" << std::endl;
std::cout << "Dtype: float32" << '\n';
} else if (scalarType == at::ScalarType::Half) {
std::cout << "Dtype: float16" << std::endl;
std::cout << "Dtype: float16" << '\n';
} else if (scalarType == at::ScalarType::BFloat16) {
std::cout << "Dtype: bfloat16" << std::endl;
std::cout << "Dtype: bfloat16" << '\n';
} else {
std::cout << "Dtype: " << t->dtype() << '\n';
}

View File

@ -838,7 +838,7 @@ static void onFunctionExit(const RecordFunction& fn, ObserverContext* ctx_ptr) {
if (!checkFunctionOutputsForLogging(fn)) {
return;
}
auto outputs = fn.outputs();
const auto& outputs = fn.outputs();
auto num_outputs = fn.num_outputs();
// need to account for Stack mode where the outputs are at the end.
size_t output_start = outputs.size() - num_outputs;

View File

@ -57,7 +57,7 @@ static void start_manager() {
handle.append(buffer.data(), bytes_read);
}
SYSCHECK_ERR_RETURN_NEG1(close(pipe_ends[0]));
if (handle.length() == 0) {
if (handle.empty()) {
std::string msg("no response from torch_shm_manager at \"");
msg += manager_executable_path;
msg += "\"";