mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[Clang-tidy header][19/N] Enable clang-tidy on torch/csrc/autograd/profiler_legacy.* (#120552)
This PR enables clang-tidy on torch/csrc/autograd/profiler_legacy.* and cleans some path rules of clang-tidy. Pull Request resolved: https://github.com/pytorch/pytorch/pull/120552 Approved by: https://github.com/Skylion007
This commit is contained in:
@ -191,10 +191,9 @@ include_patterns = [
|
||||
'c10/**/*.h',
|
||||
# Enable coverage of headers in torch/csrc and excluding sub-directories for now.
|
||||
'torch/csrc/*.h',
|
||||
'torch/csrc/autograd/**/*.h',
|
||||
'torch/csrc/*.cpp',
|
||||
'torch/csrc/**/*.h',
|
||||
'torch/csrc/**/*.cpp',
|
||||
'torch/csrc/cuda/**/*.h',
|
||||
'torch/csrc/cuda/**/*.cpp',
|
||||
]
|
||||
exclude_patterns = [
|
||||
# The negative filters below are to exclude files that include onnx_pb.h or
|
||||
@ -223,7 +222,6 @@ exclude_patterns = [
|
||||
'third_party/**/*',
|
||||
'torch/csrc/api/**',
|
||||
'torch/csrc/autograd/generated/**',
|
||||
'torch/csrc/autograd/profiler_legacy.cpp',
|
||||
'torch/csrc/dynamo/*',
|
||||
'torch/csrc/distributed/**/*',
|
||||
'torch/csrc/inductor/**/*',
|
||||
|
@ -11,7 +11,6 @@
|
||||
|
||||
#include <fstream>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
@ -23,9 +22,7 @@
|
||||
|
||||
#include <iostream>
|
||||
|
||||
namespace torch {
|
||||
namespace autograd {
|
||||
namespace profiler {
|
||||
namespace torch::autograd::profiler {
|
||||
|
||||
// We decompose the profiler logic into the following components:
|
||||
//
|
||||
@ -123,7 +120,6 @@ using torch::profiler::impl::ActiveProfilerType;
|
||||
using torch::profiler::impl::ProfilerStateBase;
|
||||
|
||||
struct ProfilerLegacyThreadLocalState : public ProfilerStateBase {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
||||
explicit ProfilerLegacyThreadLocalState(
|
||||
const torch::profiler::impl::ProfilerConfig& config)
|
||||
: ProfilerStateBase(config), remoteProfiledEvents_{c10::nullopt} {}
|
||||
@ -166,15 +162,13 @@ struct ProfilerLegacyThreadLocalState : public ProfilerStateBase {
|
||||
}
|
||||
|
||||
protected:
|
||||
RangeEventList& getEventList(int64_t thread_id = -1);
|
||||
RangeEventList& getEventList(
|
||||
std::optional<uint64_t> thread_id = std::nullopt);
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
|
||||
std::mutex state_mutex_;
|
||||
std::unordered_map<uint64_t, std::shared_ptr<RangeEventList>>
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
|
||||
event_lists_map_;
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes)
|
||||
c10::optional<std::vector<std::vector<LegacyEvent>>> remoteProfiledEvents_;
|
||||
};
|
||||
|
||||
@ -316,18 +310,18 @@ void ProfilerLegacyThreadLocalState::reportMemoryUsage(
|
||||
}
|
||||
|
||||
RangeEventList& ProfilerLegacyThreadLocalState::getEventList(
|
||||
int64_t thread_id) {
|
||||
if (thread_id < 0) {
|
||||
std::optional<uint64_t> thread_id) {
|
||||
if (!thread_id.has_value()) {
|
||||
thread_id = at::RecordFunction::currentThreadId();
|
||||
}
|
||||
RangeEventList* list_ptr = nullptr;
|
||||
std::lock_guard<std::mutex> guard(state_mutex_);
|
||||
auto it = event_lists_map_.find(thread_id);
|
||||
auto it = event_lists_map_.find(thread_id.value());
|
||||
if (it != event_lists_map_.end()) {
|
||||
list_ptr = it->second.get();
|
||||
} else {
|
||||
auto event_list = std::make_shared<RangeEventList>();
|
||||
event_lists_map_[thread_id] = event_list;
|
||||
event_lists_map_[thread_id.value()] = event_list;
|
||||
list_ptr = event_list.get();
|
||||
}
|
||||
return *list_ptr;
|
||||
@ -493,7 +487,7 @@ void LegacyEvent::record(bool record_cuda) {
|
||||
" elements to reconstruct LegacyEvent.");
|
||||
|
||||
// Reconstruct input shapes from ivalues.
|
||||
auto shapeListIValue = ivalues.get(EventIValueIdx::SHAPES);
|
||||
const auto& shapeListIValue = ivalues.get(EventIValueIdx::SHAPES);
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
shapeListIValue.isList(),
|
||||
"Expected profiler shapes IValue to contain type c10::impl::GenericList.");
|
||||
@ -503,7 +497,7 @@ void LegacyEvent::record(bool record_cuda) {
|
||||
shapes.reserve(shapeList.size());
|
||||
for (const auto i : c10::irange(shapeList.size())) {
|
||||
std::vector<int64_t> s;
|
||||
auto shapeIValue = shapeList.get(i);
|
||||
const auto& shapeIValue = shapeList.get(i);
|
||||
TORCH_INTERNAL_ASSERT(
|
||||
shapeIValue.isList(),
|
||||
"Expected each profiler shape element to contain shapes of type c10::impl::GenericList.")
|
||||
@ -529,8 +523,10 @@ void LegacyEvent::record(bool record_cuda) {
|
||||
ivalues.get(EventIValueIdx::CPU_NS).toInt(), // cpu_ns
|
||||
ivalues.get(EventIValueIdx::CUDA_RECORDED).toBool(), // was cuda recorded
|
||||
ivalues.get(EventIValueIdx::CUDA_MEM_USAGE).toInt(), // cuda memory usage
|
||||
ivalues.get(EventIValueIdx::CUDA_DEVICE).toInt(), // device
|
||||
ivalues.get(EventIValueIdx::CUDA_US).toInt() // cuda_us
|
||||
c10::DeviceIndex(
|
||||
ivalues.get(EventIValueIdx::CUDA_DEVICE).toInt()), // device
|
||||
static_cast<double>(
|
||||
ivalues.get(EventIValueIdx::CUDA_US).toInt()) // cuda_us
|
||||
);
|
||||
return evt;
|
||||
}
|
||||
@ -669,9 +665,9 @@ RecordProfile::~RecordProfile() {
|
||||
}
|
||||
processEvents(events);
|
||||
} catch (const std::exception& e) {
|
||||
LOG(ERROR) << e.what() << std::endl;
|
||||
LOG(ERROR) << e.what() << '\n';
|
||||
} catch (...) {
|
||||
LOG(ERROR) << "Unknown error" << std::endl;
|
||||
LOG(ERROR) << "Unknown error" << '\n';
|
||||
}
|
||||
}
|
||||
|
||||
@ -679,6 +675,4 @@ void RecordProfile::processEvents(const std::vector<LegacyEvent*>& events) {
|
||||
writeProfilerEventsToStream(out_, events);
|
||||
}
|
||||
|
||||
} // namespace profiler
|
||||
} // namespace autograd
|
||||
} // namespace torch
|
||||
} // namespace torch::autograd::profiler
|
||||
|
@ -27,7 +27,6 @@ enum class C10_API_ENUM EventKind : uint16_t {
|
||||
|
||||
// To be deprecated, once we switch to Kineto profiling
|
||||
struct TORCH_API LegacyEvent {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
||||
LegacyEvent(
|
||||
EventKind kind,
|
||||
at::StringView name,
|
||||
@ -35,7 +34,7 @@ struct TORCH_API LegacyEvent {
|
||||
bool record_cuda,
|
||||
at::RecordFunctionHandle handle = 0,
|
||||
std::vector<std::vector<int64_t>>&& shapes = {},
|
||||
int node_id = -1,
|
||||
int64_t node_id = -1,
|
||||
bool is_async = false)
|
||||
: name_(std::move(name)),
|
||||
kind_(kind),
|
||||
@ -48,14 +47,13 @@ struct TORCH_API LegacyEvent {
|
||||
}
|
||||
|
||||
// Constructor to be used in conjunction with LegacyEvent::fromIValue.
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init)
|
||||
LegacyEvent(
|
||||
EventKind kind,
|
||||
at::StringView name,
|
||||
uint16_t thread_id,
|
||||
at::RecordFunctionHandle handle,
|
||||
std::vector<std::vector<int64_t>>&& shapes,
|
||||
int node_id,
|
||||
int64_t node_id,
|
||||
bool is_remote,
|
||||
int64_t cpu_memory_usage,
|
||||
int64_t cpu_ns,
|
||||
@ -123,7 +121,6 @@ struct TORCH_API LegacyEvent {
|
||||
}
|
||||
|
||||
double cpuElapsedUs(const LegacyEvent& e) const {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions,cppcoreguidelines-avoid-magic-numbers)
|
||||
return static_cast<double>(e.cpu_ns_ - cpu_ns_) / (1000.0);
|
||||
}
|
||||
|
||||
@ -170,12 +167,12 @@ struct TORCH_API LegacyEvent {
|
||||
}
|
||||
|
||||
// Node ID corresponding to this event.
|
||||
int nodeId() const {
|
||||
int64_t nodeId() const {
|
||||
return node_id_;
|
||||
}
|
||||
|
||||
// Set Node ID on this event.
|
||||
void setNodeId(int node_id) {
|
||||
void setNodeId(int64_t node_id) {
|
||||
node_id_ = node_id;
|
||||
}
|
||||
|
||||
@ -257,22 +254,22 @@ struct TORCH_API LegacyEvent {
|
||||
at::StringView name_;
|
||||
EventKind kind_;
|
||||
uint64_t thread_id_;
|
||||
uint64_t fwd_thread_id_;
|
||||
uint64_t fwd_thread_id_{0};
|
||||
at::RecordFunctionHandle handle_{0};
|
||||
std::vector<std::vector<int64_t>> shapes_;
|
||||
int64_t cpu_memory_usage_ = 0;
|
||||
int64_t cuda_memory_usage_ = 0;
|
||||
c10::DeviceIndex device_ = -1;
|
||||
torch::profiler::impl::ProfilerVoidEventStub cuda_event = nullptr;
|
||||
int node_id_ = 0;
|
||||
int64_t node_id_ = 0;
|
||||
bool is_remote_ = false;
|
||||
int64_t cuda_us_ = -1;
|
||||
int64_t sequence_nr_ = -1;
|
||||
bool is_async_ = false;
|
||||
|
||||
std::vector<std::string> stack_;
|
||||
uint8_t scope_;
|
||||
uint64_t correlation_id_;
|
||||
uint8_t scope_{0};
|
||||
uint64_t correlation_id_{0};
|
||||
// Extra arguments for computing op flops
|
||||
std::unordered_map<std::string, c10::IValue> extra_args_;
|
||||
uint64_t flops_ = 0;
|
||||
@ -282,7 +279,6 @@ struct TORCH_API LegacyEvent {
|
||||
// a std::vector resize from taking a large amount of time inside
|
||||
// a profiling event
|
||||
struct RangeEventList {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init,modernize-use-equals-default)
|
||||
RangeEventList() {
|
||||
events_.reserve(kReservedCapacity);
|
||||
}
|
||||
@ -385,8 +381,7 @@ struct TORCH_API TLSLegacyProfilerGuard {
|
||||
c10::optional<ProfilerDisableOptions> profilerDisableOptions =
|
||||
c10::nullopt)
|
||||
: cb_(std::move(resultCallback)),
|
||||
// NOLINTNEXTLINE(performance-move-const-arg)
|
||||
profilerDisableOptions_(std::move(profilerDisableOptions)) {
|
||||
profilerDisableOptions_(profilerDisableOptions) {
|
||||
enableProfilerLegacy(cfg);
|
||||
}
|
||||
~TLSLegacyProfilerGuard() {
|
||||
|
Reference in New Issue
Block a user