mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
This is follow-up of #164912 to mark unused C++ parameters to improve code readability. Pull Request resolved: https://github.com/pytorch/pytorch/pull/165121 Approved by: https://github.com/Skylion007
102 lines
2.5 KiB
C++
102 lines
2.5 KiB
C++
#pragma once
|
|
|
|
#include <array>
|
|
#include <cstdint>
|
|
#include <memory>
|
|
#include <stack>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include <torch/csrc/profiler/events.h>
|
|
|
|
#include <c10/util/Exception.h>
|
|
|
|
namespace torch::profiler::impl::linux_perf {
|
|
|
|
/*
|
|
* Maximum number of events supported
|
|
* This stems from the hardware limitation on CPU performance counters, and the
|
|
* fact that we don't support time multiplexing just yet.
|
|
* Time multiplexing involves scaling the counter values proportional to
|
|
* the enabled and running time or running the workload multiple times.
|
|
*/
|
|
constexpr uint8_t MAX_EVENTS = 4;
|
|
|
|
struct PerfCounter {
|
|
uint64_t value; /* The value of the event */
|
|
uint64_t time_enabled; /* for TIME_ENABLED */
|
|
uint64_t time_running; /* for TIME_RUNNING */
|
|
};
|
|
|
|
/*
|
|
* Basic perf event handler for Android and Linux
|
|
*/
|
|
class PerfEvent {
|
|
public:
|
|
explicit PerfEvent(std::string& name) : name_(name) {}
|
|
|
|
PerfEvent(const PerfEvent& other) = delete;
|
|
PerfEvent& operator=(const PerfEvent&) = delete;
|
|
PerfEvent& operator=(PerfEvent&& other) noexcept {
|
|
if (this != &other) {
|
|
fd_ = other.fd_;
|
|
other.fd_ = -1;
|
|
name_ = std::move(other.name_);
|
|
}
|
|
return *this;
|
|
}
|
|
|
|
PerfEvent(PerfEvent&& other) noexcept {
|
|
*this = std::move(other);
|
|
}
|
|
|
|
~PerfEvent();
|
|
|
|
/* Setup perf events with the Linux Kernel, attaches perf to this process
|
|
* using perf_event_open(2) */
|
|
void Init();
|
|
|
|
/* Stop incrementing hardware counters for this event */
|
|
void Disable() const;
|
|
|
|
/* Start counting hardware event from this point on */
|
|
void Enable() const;
|
|
|
|
/* Zero out the counts for this event */
|
|
void Reset() const;
|
|
|
|
/* Returns PerfCounter values for this event from kernel, on non supported
|
|
* platforms this always returns zero */
|
|
uint64_t ReadCounter() const;
|
|
|
|
private:
|
|
/* Name of the event */
|
|
std::string name_;
|
|
|
|
int fd_ = -1;
|
|
};
|
|
|
|
class PerfProfiler {
|
|
public:
|
|
/* Configure all the events and track them as individual PerfEvent */
|
|
void Configure(std::vector<std::string>& event_names);
|
|
|
|
/* Enable events counting from here */
|
|
void Enable();
|
|
|
|
/* Disable counting and fill in the caller supplied container with delta
|
|
* calculated from the start count values since last Enable() */
|
|
void Disable(perf_counters_t& /*vals*/);
|
|
|
|
private:
|
|
uint64_t CalcDelta(uint64_t start, uint64_t end) const;
|
|
void StartCounting() const;
|
|
void StopCounting() const;
|
|
|
|
std::vector<PerfEvent> events_;
|
|
std::stack<perf_counters_t> start_values_;
|
|
};
|
|
} // namespace torch::profiler::impl::linux_perf
|