[Clang-tidy header][19/N] Enable clang-tidy on torch/csrc/autograd/profiler_legacy.* (#120552)

This PR enables clang-tidy on torch/csrc/autograd/profiler_legacy.* and cleans some path rules of clang-tidy.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/120552
Approved by: https://github.com/Skylion007
This commit is contained in:
cyy
2024-02-25 03:29:38 +00:00
committed by PyTorch MergeBot
parent 298c686d3f
commit 81f0b2c14e
3 changed files with 27 additions and 40 deletions

View File

@ -191,10 +191,9 @@ include_patterns = [
'c10/**/*.h',
# Enable coverage of headers in torch/csrc and excluding sub-directories for now.
'torch/csrc/*.h',
'torch/csrc/autograd/**/*.h',
'torch/csrc/*.cpp',
'torch/csrc/**/*.h',
'torch/csrc/**/*.cpp',
'torch/csrc/cuda/**/*.h',
'torch/csrc/cuda/**/*.cpp',
]
exclude_patterns = [
# The negative filters below are to exclude files that include onnx_pb.h or
@ -223,7 +222,6 @@ exclude_patterns = [
'third_party/**/*',
'torch/csrc/api/**',
'torch/csrc/autograd/generated/**',
'torch/csrc/autograd/profiler_legacy.cpp',
'torch/csrc/dynamo/*',
'torch/csrc/distributed/**/*',
'torch/csrc/inductor/**/*',

View File

@ -11,7 +11,6 @@
#include <fstream>
#include <mutex>
#include <sstream>
#include <string>
#include <vector>
@ -23,9 +22,7 @@
#include <iostream>
namespace torch {
namespace autograd {
namespace profiler {
namespace torch::autograd::profiler {
// We decompose the profiler logic into the following components:
//
@ -123,7 +120,6 @@ using torch::profiler::impl::ActiveProfilerType;
using torch::profiler::impl::ProfilerStateBase;
struct ProfilerLegacyThreadLocalState : public ProfilerStateBase {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
explicit ProfilerLegacyThreadLocalState(
const torch::profiler::impl::ProfilerConfig& config)
: ProfilerStateBase(config), remoteProfiledEvents_{c10::nullopt} {}
@ -166,15 +162,13 @@ struct ProfilerLegacyThreadLocalState : public ProfilerStateBase {
}
protected:
RangeEventList& getEventList(int64_t thread_id = -1);
RangeEventList& getEventList(
std::optional<uint64_t> thread_id = std::nullopt);
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
std::mutex state_mutex_;
std::unordered_map<uint64_t, std::shared_ptr<RangeEventList>>
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
event_lists_map_;
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
c10::optional<std::vector<std::vector<LegacyEvent>>> remoteProfiledEvents_;
};
@ -316,18 +310,18 @@ void ProfilerLegacyThreadLocalState::reportMemoryUsage(
}
RangeEventList& ProfilerLegacyThreadLocalState::getEventList(
int64_t thread_id) {
if (thread_id < 0) {
std::optional<uint64_t> thread_id) {
if (!thread_id.has_value()) {
thread_id = at::RecordFunction::currentThreadId();
}
RangeEventList* list_ptr = nullptr;
std::lock_guard<std::mutex> guard(state_mutex_);
auto it = event_lists_map_.find(thread_id);
auto it = event_lists_map_.find(thread_id.value());
if (it != event_lists_map_.end()) {
list_ptr = it->second.get();
} else {
auto event_list = std::make_shared<RangeEventList>();
event_lists_map_[thread_id] = event_list;
event_lists_map_[thread_id.value()] = event_list;
list_ptr = event_list.get();
}
return *list_ptr;
@ -493,7 +487,7 @@ void LegacyEvent::record(bool record_cuda) {
" elements to reconstruct LegacyEvent.");
// Reconstruct input shapes from ivalues.
auto shapeListIValue = ivalues.get(EventIValueIdx::SHAPES);
const auto& shapeListIValue = ivalues.get(EventIValueIdx::SHAPES);
TORCH_INTERNAL_ASSERT(
shapeListIValue.isList(),
"Expected profiler shapes IValue to contain type c10::impl::GenericList.");
@ -503,7 +497,7 @@ void LegacyEvent::record(bool record_cuda) {
shapes.reserve(shapeList.size());
for (const auto i : c10::irange(shapeList.size())) {
std::vector<int64_t> s;
auto shapeIValue = shapeList.get(i);
const auto& shapeIValue = shapeList.get(i);
TORCH_INTERNAL_ASSERT(
shapeIValue.isList(),
"Expected each profiler shape element to contain shapes of type c10::impl::GenericList.")
@ -529,8 +523,10 @@ void LegacyEvent::record(bool record_cuda) {
ivalues.get(EventIValueIdx::CPU_NS).toInt(), // cpu_ns
ivalues.get(EventIValueIdx::CUDA_RECORDED).toBool(), // was cuda recorded
ivalues.get(EventIValueIdx::CUDA_MEM_USAGE).toInt(), // cuda memory usage
ivalues.get(EventIValueIdx::CUDA_DEVICE).toInt(), // device
ivalues.get(EventIValueIdx::CUDA_US).toInt() // cuda_us
c10::DeviceIndex(
ivalues.get(EventIValueIdx::CUDA_DEVICE).toInt()), // device
static_cast<double>(
ivalues.get(EventIValueIdx::CUDA_US).toInt()) // cuda_us
);
return evt;
}
@ -669,9 +665,9 @@ RecordProfile::~RecordProfile() {
}
processEvents(events);
} catch (const std::exception& e) {
LOG(ERROR) << e.what() << std::endl;
LOG(ERROR) << e.what() << '\n';
} catch (...) {
LOG(ERROR) << "Unknown error" << std::endl;
LOG(ERROR) << "Unknown error" << '\n';
}
}
@ -679,6 +675,4 @@ void RecordProfile::processEvents(const std::vector<LegacyEvent*>& events) {
writeProfilerEventsToStream(out_, events);
}
} // namespace profiler
} // namespace autograd
} // namespace torch
} // namespace torch::autograd::profiler

View File

@ -27,7 +27,6 @@ enum class C10_API_ENUM EventKind : uint16_t {
// To be deprecated, once we switch to Kineto profiling
struct TORCH_API LegacyEvent {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
LegacyEvent(
EventKind kind,
at::StringView name,
@ -35,7 +34,7 @@ struct TORCH_API LegacyEvent {
bool record_cuda,
at::RecordFunctionHandle handle = 0,
std::vector<std::vector<int64_t>>&& shapes = {},
int node_id = -1,
int64_t node_id = -1,
bool is_async = false)
: name_(std::move(name)),
kind_(kind),
@ -48,14 +47,13 @@ struct TORCH_API LegacyEvent {
}
// Constructor to be used in conjunction with LegacyEvent::fromIValue.
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
LegacyEvent(
EventKind kind,
at::StringView name,
uint16_t thread_id,
at::RecordFunctionHandle handle,
std::vector<std::vector<int64_t>>&& shapes,
int node_id,
int64_t node_id,
bool is_remote,
int64_t cpu_memory_usage,
int64_t cpu_ns,
@ -123,7 +121,6 @@ struct TORCH_API LegacyEvent {
}
double cpuElapsedUs(const LegacyEvent& e) const {
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers)
return static_cast<double>(e.cpu_ns_ - cpu_ns_) / (1000.0);
}
@ -170,12 +167,12 @@ struct TORCH_API LegacyEvent {
}
// Node ID corresponding to this event.
int nodeId() const {
int64_t nodeId() const {
return node_id_;
}
// Set Node ID on this event.
void setNodeId(int node_id) {
void setNodeId(int64_t node_id) {
node_id_ = node_id;
}
@ -257,22 +254,22 @@ struct TORCH_API LegacyEvent {
at::StringView name_;
EventKind kind_;
uint64_t thread_id_;
uint64_t fwd_thread_id_;
uint64_t fwd_thread_id_{0};
at::RecordFunctionHandle handle_{0};
std::vector<std::vector<int64_t>> shapes_;
int64_t cpu_memory_usage_ = 0;
int64_t cuda_memory_usage_ = 0;
c10::DeviceIndex device_ = -1;
torch::profiler::impl::ProfilerVoidEventStub cuda_event = nullptr;
int node_id_ = 0;
int64_t node_id_ = 0;
bool is_remote_ = false;
int64_t cuda_us_ = -1;
int64_t sequence_nr_ = -1;
bool is_async_ = false;
std::vector<std::string> stack_;
uint8_t scope_;
uint64_t correlation_id_;
uint8_t scope_{0};
uint64_t correlation_id_{0};
// Extra arguments for computing op flops
std::unordered_map<std::string, c10::IValue> extra_args_;
uint64_t flops_ = 0;
@ -282,7 +279,6 @@ struct TORCH_API LegacyEvent {
// a std::vector resize from taking a large amount of time inside
// a profiling event
struct RangeEventList {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init,modernize-use-equals-default)
RangeEventList() {
events_.reserve(kReservedCapacity);
}
@ -385,8 +381,7 @@ struct TORCH_API TLSLegacyProfilerGuard {
c10::optional<ProfilerDisableOptions> profilerDisableOptions =
c10::nullopt)
: cb_(std::move(resultCallback)),
// NOLINTNEXTLINE(performance-move-const-arg)
profilerDisableOptions_(std::move(profilerDisableOptions)) {
profilerDisableOptions_(profilerDisableOptions) {
enableProfilerLegacy(cfg);
}
~TLSLegacyProfilerGuard() {