mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Fix readibility checks in TIDY and apply them (#164475)
Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/164475 Approved by: https://github.com/albanD, https://github.com/Skylion007 Co-authored-by: Aaron Gokaslan <aaronGokaslan@gmail.com>
This commit is contained in:
committed by
PyTorch MergeBot
parent
5f775bdfb7
commit
115af42e9d
@ -59,9 +59,9 @@ performance-*,
|
||||
-performance-enum-size,
|
||||
readability-container-size-empty,
|
||||
readability-delete-null-pointer,
|
||||
readability-duplicate-include
|
||||
readability-duplicate-include,
|
||||
readability-misplaced-array-index,
|
||||
readability-redundant*
|
||||
readability-redundant*,
|
||||
readability-simplify-subscript-expr,
|
||||
readability-string-compare,
|
||||
-readability-redundant-access-specifiers,
|
||||
|
@ -1375,7 +1375,7 @@ _scaled_mm_out_cuda(const Tensor& mat1, const Tensor& mat2,
|
||||
if (scaling_choice_a == ScalingType::RowWise && scaling_choice_b == ScalingType::RowWise
|
||||
&& ((dprops->major < 9 || CUBLAS_VERSION < 120900 || cublasLtGetVersion() < 120900)
|
||||
// cuBLAS only supports tiled 1D factor layout for 1D block scaling, no 2D block scales
|
||||
|| (dprops->major >= 10 && (scale_a.sizes().size() || scale_b.sizes().size())))) {
|
||||
|| (dprops->major >= 10 && (!scale_a.sizes().empty() || !scale_b.sizes().empty())))) {
|
||||
TORCH_CHECK(out.dtype() == kBFloat16, "Only bf16 high precision output types are supported for row-wise scaling.");
|
||||
at::cuda::detail::f8f8bf16_rowwise(
|
||||
mat1,
|
||||
|
@ -8,7 +8,6 @@
|
||||
#include <ATen/NativeFunctions.h>
|
||||
#include <ATen/Dispatch.h>
|
||||
#include <ATen/DynamicLibrary.h>
|
||||
#include <ATen/NativeFunctions.h>
|
||||
#include <ATen/native/cuda/MiscUtils.h>
|
||||
#include <ATen/native/Resize.h>
|
||||
#include <ATen/native/LinearAlgebra.h>
|
||||
|
@ -1041,8 +1041,8 @@ std::string generate_code(
|
||||
// and `extra_args` for computation call if
|
||||
// extra arguments to capture runtime state are passed.
|
||||
// (look at polygamma for example).
|
||||
std::string extra_params = "";
|
||||
std::string extra_args = "";
|
||||
std::string extra_params;
|
||||
std::string extra_args;
|
||||
for (size_t i = 0; i < extra_args_typenames.size(); i++) {
|
||||
auto type = std::string(extra_args_typenames[i]);
|
||||
auto name = "extra_arg_" + std::to_string(i);
|
||||
@ -1352,7 +1352,7 @@ std::string generate_reduction_code(
|
||||
int vec_size,
|
||||
int max_threads_codegen) {
|
||||
TORCH_INTERNAL_ASSERT(desc.nInputs == 1);
|
||||
TORCH_INTERNAL_ASSERT(desc.extra_args_types.size() == 0);
|
||||
TORCH_INTERNAL_ASSERT(desc.extra_args_types.empty());
|
||||
|
||||
return generate_reduction_code(
|
||||
desc.nOutputs,
|
||||
@ -1451,7 +1451,7 @@ std::optional<std::string> get_cache_dir() {
|
||||
std::string cache_dir;
|
||||
char* ptkcp = std::getenv("PYTORCH_KERNEL_CACHE_PATH");
|
||||
// Create kernel_cache_dir if needed as we do not want to create the base directory passed by the user
|
||||
std::string kernels_cache_dir = "";
|
||||
std::string kernels_cache_dir;
|
||||
if (ptkcp != nullptr) {
|
||||
cache_dir = std::string(ptkcp);
|
||||
} else {
|
||||
|
@ -14,7 +14,6 @@
|
||||
#include <ATen/native/LinearAlgebraUtils.h>
|
||||
#include <ATen/native/cuda/MiscUtils.h>
|
||||
#include <ATen/native/LinearAlgebra.h>
|
||||
#include <ATen/native/BatchLinearAlgebra.h>
|
||||
#include <ATen/native/cuda/linalg/BatchLinearAlgebraLib.h>
|
||||
#include <ATen/native/cuda/linalg/MagmaUtils.h>
|
||||
#include <ATen/native/cpu/zmath.h>
|
||||
|
@ -813,7 +813,7 @@ static void _save_variables(
|
||||
const std::vector<std::optional<at::Tensor>>& tensors_to_save,
|
||||
const std::shared_ptr<PyNode>& cdata_ptr,
|
||||
THPFunction* self) {
|
||||
if (tensors_to_save.size() == 0)
|
||||
if (tensors_to_save.empty())
|
||||
return;
|
||||
size_t num_saved = tensors_to_save.size();
|
||||
self->saved_variables.clear();
|
||||
|
@ -35,7 +35,7 @@ at::Tensor allocate_all_gather_output(
|
||||
int64_t group_size) {
|
||||
TORCH_CHECK(input.is_contiguous());
|
||||
auto output_size = input.sizes().vec();
|
||||
if (output_size.size() == 0) {
|
||||
if (output_size.empty()) {
|
||||
output_size.push_back(group_size);
|
||||
} else {
|
||||
output_size[0] *= group_size;
|
||||
|
@ -196,7 +196,7 @@ std::shared_ptr<::gloo::transport::Device> makeGlooDevice(
|
||||
static auto transportName = c10::utils::get_env("GLOO_DEVICE_TRANSPORT");
|
||||
if (transportName.has_value()) {
|
||||
return GlooDeviceRegistry()->Create(
|
||||
transportName.value().c_str(), interfaceName, hostName, lazyInit);
|
||||
transportName.value(), interfaceName, hostName, lazyInit);
|
||||
}
|
||||
|
||||
#ifdef __linux__
|
||||
|
@ -165,7 +165,7 @@ c10::intrusive_ptr<ProcessGroup> ProcessGroup::splitGroup(
|
||||
const std::optional<std::string>& name,
|
||||
const std::optional<std::string>& desc) {
|
||||
TORCH_CHECK(
|
||||
ranks.size() > 0,
|
||||
!ranks.empty(),
|
||||
"Split ranks cannot be empty. Please provide a non-empty list of ranks to split the group.");
|
||||
TORCH_CHECK(
|
||||
ranks.size() <= static_cast<size_t>(size_),
|
||||
|
@ -559,7 +559,7 @@ c10::intrusive_ptr<ProcessGroupGloo::Options> ProcessGroupGloo::Options::
|
||||
// Use interfaces listed in "GLOO_SOCKET_IFNAME", if set.
|
||||
auto ifnameEnv = c10::utils::get_env("GLOO_SOCKET_IFNAME");
|
||||
if (ifnameEnv && ifnameEnv->size() > 1) {
|
||||
for (const auto& iface : ::c10d::split(',', ifnameEnv->c_str())) {
|
||||
for (const auto& iface : ::c10d::split(',', *ifnameEnv)) {
|
||||
options->devices.push_back(
|
||||
::c10d::ProcessGroupGloo::createDeviceForInterface(iface, lazyInit));
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
#ifdef USE_C10D_GLOO
|
||||
#include <torch/csrc/distributed/c10d/ProcessGroupGloo.hpp>
|
||||
#include <torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp>
|
||||
#include <utility>
|
||||
|
||||
#include <gloo/cuda_allreduce_ring_chunked.h>
|
||||
|
||||
@ -24,7 +25,7 @@ class AsyncAllreduceCUDADeviceWork : public ProcessGroupGloo::AsyncWork {
|
||||
"gloo:all_reduce",
|
||||
inputs),
|
||||
inputs_(inputs),
|
||||
reduceOp_(reduceOp) {}
|
||||
reduceOp_(std::move(reduceOp)) {}
|
||||
|
||||
template <typename T>
|
||||
void createAlgorithm(std::unique_ptr<gloo::Algorithm>& algo) {
|
||||
|
@ -1089,8 +1089,8 @@ class TORCH_API ProcessGroupNCCL : public Backend {
|
||||
bool useNonblocking();
|
||||
|
||||
protected:
|
||||
int globalRankStart_;
|
||||
int globalRankStride_;
|
||||
int globalRankStart_{};
|
||||
int globalRankStride_{};
|
||||
|
||||
private:
|
||||
bool eagerInit_{false};
|
||||
@ -1380,7 +1380,7 @@ class TORCH_API ProcessGroupNCCL : public Backend {
|
||||
std::shared_ptr<NCCLComm> coalescedComm_ = nullptr;
|
||||
|
||||
// Whether the coalesced calls are sync or async.
|
||||
bool coalescedAsync_;
|
||||
bool coalescedAsync_{};
|
||||
|
||||
// keeps track of input and output tensors when coalescing is in flight. Will
|
||||
// hand over these tensors to WorkNCCL's stash when coalescing is ended.
|
||||
|
@ -163,8 +163,8 @@ struct CollectiveFingerPrint {
|
||||
backend->allgather(output_tensors, tensors_to_verify)->wait();
|
||||
// Verify equivalence
|
||||
for (const auto i : c10::irange(output_tensors.size())) {
|
||||
const std::vector<at::Tensor> gathered_tensors = output_tensors[i];
|
||||
const at::Tensor reference_tensor = tensors_to_verify[i];
|
||||
const std::vector<at::Tensor>& gathered_tensors = output_tensors[i];
|
||||
const at::Tensor& reference_tensor = tensors_to_verify[i];
|
||||
for (const auto rank : c10::irange(gathered_tensors.size())) {
|
||||
const auto& rank_tensor = gathered_tensors[rank];
|
||||
if (!rank_tensor.equal(reference_tensor)) {
|
||||
|
@ -263,10 +263,12 @@ constexpr static int kNumUvThreads = 16;
|
||||
|
||||
std::unique_ptr<ChannelRegistration> makeMultiplexedUvChannel() {
|
||||
std::vector<std::shared_ptr<tensorpipe::transport::Context>> contexts;
|
||||
contexts.reserve(kNumUvThreads);
|
||||
std::vector<std::shared_ptr<tensorpipe::transport::Listener>> listeners;
|
||||
listeners.reserve(kNumUvThreads);
|
||||
for ([[maybe_unused]] const auto laneIdx : c10::irange(kNumUvThreads)) {
|
||||
auto context = tensorpipe::transport::uv::create();
|
||||
std::string address = TensorPipeAgent::guessAddress();
|
||||
const std::string& address = TensorPipeAgent::guessAddress();
|
||||
contexts.push_back(std::move(context));
|
||||
listeners.push_back(contexts.back()->listen(address));
|
||||
}
|
||||
|
Reference in New Issue
Block a user