mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Expose operators execution statistics to python frontend.
Summary: To expose operators execution statistics in python, profiling measurements collected in ProfDAGNet class is leveraged. In current implementation, a new operator is defined that outputs the statistic data in a protobuf message. In the frontend, OperatorStatsContainer works as a wrapper to print ProfDAGNet statistics. Differential Revision: D4923009 fbshipit-source-id: 18a6d76a405ef277a3fca7a312609051cf943207
This commit is contained in:
committed by
Yangqing Jia
parent
09bb91022a
commit
3b0069a014
24
caffe2/contrib/perf_contbld/utils.py
Normal file
24
caffe2/contrib/perf_contbld/utils.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
## @package utils
|
||||||
|
# Module caffe2.contrib.perf_contbld.utils
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import print_function
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
|
||||||
|
from caffe2.proto import prof_dag_pb2
|
||||||
|
|
||||||
|
class OperatorStatsContainer():
|
||||||
|
'''
|
||||||
|
This class works as a wrapper to print ProfDAGNet statistics
|
||||||
|
'''
|
||||||
|
def __init__(self, stats_proto):
|
||||||
|
self.op_stats = prof_dag_pb2.ProfDAGProtos()
|
||||||
|
self.op_stats.ParseFromString(stats_proto)
|
||||||
|
|
||||||
|
def Print(self):
|
||||||
|
print("Time per operator type:")
|
||||||
|
for stat in self.op_stats.stats:
|
||||||
|
print("{:12.6f} ms/iter [{:10.6f} ms/iter ]\t{}".format(
|
||||||
|
stat.mean, stat.stddev, stat.name
|
||||||
|
))
|
@ -1,150 +1,149 @@
|
|||||||
|
#include "prof_dag_net.h"
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
|
||||||
#include "caffe2/core/net.h"
|
|
||||||
#include "caffe2/core/operator.h"
|
#include "caffe2/core/operator.h"
|
||||||
#include "caffe2/core/timer.h"
|
#include "caffe2/core/timer.h"
|
||||||
|
|
||||||
namespace caffe2 {
|
namespace caffe2 {
|
||||||
namespace {
|
|
||||||
|
|
||||||
struct Stats {
|
ProfDAGNet::ProfDAGNet(const NetDef& net_def, Workspace* ws)
|
||||||
float sum;
|
: DAGNetBase(net_def, ws), time_per_op_(operator_nodes_.size()) {
|
||||||
float sqrsum;
|
VLOG(1) << "Constructing ProfDAGNet " << name_;
|
||||||
};
|
}
|
||||||
|
|
||||||
/**
|
ProfDAGNet::~ProfDAGNet() {
|
||||||
* This net type is identical to DAGNet, except that it
|
VLOG(1) << "Closing ProfDAGNet " << name_;
|
||||||
* measures the time taken for each and every operator.
|
if (runs_ <= 1) {
|
||||||
*
|
LOG(INFO) << "Insufficient runs to produce meaningful data.";
|
||||||
* To collect statistics from stable runs, this net ignores the first run.
|
return;
|
||||||
* Thus, at least two runs are required for this net to print operator metrics.
|
}
|
||||||
*/
|
PrintStats();
|
||||||
class ProfDAGNet : public DAGNetBase {
|
}
|
||||||
public:
|
|
||||||
ProfDAGNet(const NetDef& net_def, Workspace* ws)
|
bool ProfDAGNet::Run() {
|
||||||
: DAGNetBase(net_def, ws), time_per_op_(operator_nodes_.size()) {
|
runs_++;
|
||||||
VLOG(1) << "Constructing ProfDAGNet " << name_;
|
|
||||||
|
// don't collect statistics from first run
|
||||||
|
if (runs_ <= 1) {
|
||||||
|
return DAGNetBase::Run();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Run() override {
|
CAFFE_ENFORCE(
|
||||||
runs_++;
|
time_per_op_.size() == operator_nodes_.size(),
|
||||||
|
"Data collected for ",
|
||||||
|
time_per_op_.size(),
|
||||||
|
" ops, expected ",
|
||||||
|
operator_nodes_.size(),
|
||||||
|
" ops.");
|
||||||
|
|
||||||
|
// create a copy and later collect the differences
|
||||||
|
vector<Stats> time_per_op_run(time_per_op_);
|
||||||
|
bool success = DAGNetBase::Run();
|
||||||
|
|
||||||
|
// aggregate this run's stats per operator type
|
||||||
|
CaffeMap<string, float> time_per_op_type_run;
|
||||||
|
for (int idx = 0; idx < operator_nodes_.size(); idx++) {
|
||||||
|
const auto& node = operator_nodes_[idx];
|
||||||
|
const string& op_type = node.operator_->def().type();
|
||||||
|
time_per_op_type_run[op_type] +=
|
||||||
|
time_per_op_[idx].sum - time_per_op_run[idx].sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto& item : time_per_op_type_run) {
|
||||||
|
time_per_op_type_[item.first].sum += item.second;
|
||||||
|
time_per_op_type_[item.first].sqrsum += item.second * item.second;
|
||||||
|
}
|
||||||
|
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
|
ProfDAGProto ProfDAGNet::ProtoMsg(std::pair<std::string, Stats> op_stat) const {
|
||||||
|
ProfDAGProto message;
|
||||||
|
float mean = op_stat.second.sum / (runs_ - 1);
|
||||||
|
float stddev = std::sqrt(op_stat.second.sqrsum / (runs_ - 1) - mean * mean);
|
||||||
|
message.set_mean(mean);
|
||||||
|
message.set_stddev(stddev);
|
||||||
|
message.set_name(op_stat.first);
|
||||||
|
return message;
|
||||||
|
}
|
||||||
|
|
||||||
|
ProfDAGProtos ProfDAGNet::GetOperatorStats() {
|
||||||
|
ProfDAGProtos prof_dag_protos;
|
||||||
|
for (auto& item : time_per_op_type_) {
|
||||||
|
auto buf = prof_dag_protos.add_stats();
|
||||||
|
buf->CopyFrom(ProtoMsg(item));
|
||||||
|
}
|
||||||
|
return prof_dag_protos;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ProfDAGNet::RunAt(const std::vector<int>& chain) {
|
||||||
|
bool success = true;
|
||||||
|
Timer timer;
|
||||||
|
for (const auto idx : chain) {
|
||||||
// don't collect metrics from first run
|
// don't collect metrics from first run
|
||||||
if (runs_ <= 1) {
|
if (runs_ <= 1) {
|
||||||
return DAGNetBase::Run();
|
success &= operator_nodes_[idx].operator_->Run();
|
||||||
}
|
|
||||||
|
|
||||||
CAFFE_ENFORCE(
|
} else {
|
||||||
time_per_op_.size() == operator_nodes_.size(),
|
timer.Start();
|
||||||
"Data collected for ",
|
success &= operator_nodes_[idx].operator_->Run();
|
||||||
time_per_op_.size(),
|
float spent = timer.MilliSeconds();
|
||||||
" ops, expected ",
|
|
||||||
operator_nodes_.size(),
|
|
||||||
" ops.");
|
|
||||||
|
|
||||||
// create a copy and later collect the differences
|
CAFFE_ENFORCE(
|
||||||
vector<Stats> time_per_op_run(time_per_op_);
|
time_per_op_.size() > idx,
|
||||||
bool success = DAGNetBase::Run();
|
"Expecting ",
|
||||||
|
time_per_op_.size(),
|
||||||
// aggregate this run's stats per operator type
|
" ops, but op #",
|
||||||
CaffeMap<string, float> time_per_op_type_run;
|
idx,
|
||||||
for (int idx = 0; idx < operator_nodes_.size(); idx++) {
|
" was given.");
|
||||||
const auto& node = operator_nodes_[idx];
|
time_per_op_[idx].sum += spent;
|
||||||
const string& op_type = node.operator_->def().type();
|
time_per_op_[idx].sqrsum += spent * spent;
|
||||||
time_per_op_type_run[op_type] +=
|
|
||||||
time_per_op_[idx].sum - time_per_op_run[idx].sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto& item : time_per_op_type_run) {
|
|
||||||
time_per_op_type_[item.first].sum += item.second;
|
|
||||||
time_per_op_type_[item.first].sqrsum += item.second * item.second;
|
|
||||||
}
|
|
||||||
|
|
||||||
return success;
|
|
||||||
}
|
|
||||||
|
|
||||||
~ProfDAGNet() {
|
|
||||||
VLOG(1) << "Closing ProfDAGNet " << name_;
|
|
||||||
if (runs_ <= 1) {
|
|
||||||
LOG(INFO) << "Insufficient runs to produce meaningful data.";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
PrintStats();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected:
|
|
||||||
bool RunAt(const std::vector<int>& chain) override {
|
|
||||||
bool success = true;
|
|
||||||
Timer timer;
|
|
||||||
for (const auto idx : chain) {
|
|
||||||
// don't collect metrics from first run
|
|
||||||
if (runs_ <= 1) {
|
|
||||||
success &= operator_nodes_[idx].operator_->Run();
|
|
||||||
|
|
||||||
} else {
|
|
||||||
timer.Start();
|
|
||||||
success &= operator_nodes_[idx].operator_->Run();
|
|
||||||
float spent = timer.MilliSeconds();
|
|
||||||
|
|
||||||
CAFFE_ENFORCE(
|
|
||||||
time_per_op_.size() > idx,
|
|
||||||
"Expecting ",
|
|
||||||
time_per_op_.size(),
|
|
||||||
" ops, but op #",
|
|
||||||
idx,
|
|
||||||
" was given.");
|
|
||||||
time_per_op_[idx].sum += spent;
|
|
||||||
time_per_op_[idx].sqrsum += spent * spent;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return success;
|
|
||||||
}
|
|
||||||
|
|
||||||
void PrintStats() {
|
|
||||||
CAFFE_ENFORCE(
|
|
||||||
time_per_op_.size() == operator_nodes_.size(),
|
|
||||||
"Data collected for ",
|
|
||||||
time_per_op_.size(),
|
|
||||||
" ops, expected ",
|
|
||||||
operator_nodes_.size(),
|
|
||||||
" ops.");
|
|
||||||
|
|
||||||
CAFFE_ENFORCE(runs_ > 1, "# of runs: ", runs_, ", expected > 1.");
|
|
||||||
int measured_runs = runs_ - 1;
|
|
||||||
|
|
||||||
for (int idx = 0; idx < operator_nodes_.size(); idx++) {
|
|
||||||
auto& node = operator_nodes_[idx];
|
|
||||||
const string& op_type = node.operator_->def().type();
|
|
||||||
const string& print_name = node.operator_->def().name().size()
|
|
||||||
? node.operator_->def().name()
|
|
||||||
: (node.operator_->def().output_size()
|
|
||||||
? node.operator_->def().output(0)
|
|
||||||
: "NO_OUTPUT");
|
|
||||||
|
|
||||||
float mean = time_per_op_[idx].sum / measured_runs;
|
|
||||||
float stddev =
|
|
||||||
std::sqrt(time_per_op_[idx].sqrsum / measured_runs - mean * mean);
|
|
||||||
LOG(INFO) << "Op #" << idx << " (" << print_name << ", " << op_type
|
|
||||||
<< ") " << mean << " ms/iter (" << stddev << " ms/iter)";
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG(INFO) << "Time per operator type:";
|
|
||||||
for (const auto& item : time_per_op_type_) {
|
|
||||||
float mean = item.second.sum / measured_runs;
|
|
||||||
float stddev =
|
|
||||||
std::sqrt(item.second.sqrsum / measured_runs - mean * mean);
|
|
||||||
LOG(INFO) << std::setw(10) << std::setfill(' ') << mean << " ms/iter ("
|
|
||||||
<< std::setw(10) << std::setfill(' ') << stddev << " ms/iter) "
|
|
||||||
<< item.first;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
||||||
vector<Stats> time_per_op_;
|
void ProfDAGNet::PrintStats() {
|
||||||
CaffeMap<string, Stats> time_per_op_type_;
|
CAFFE_ENFORCE(
|
||||||
int runs_ = 0;
|
time_per_op_.size() == operator_nodes_.size(),
|
||||||
};
|
"Data collected for ",
|
||||||
|
time_per_op_.size(),
|
||||||
|
" ops, expected ",
|
||||||
|
operator_nodes_.size(),
|
||||||
|
" ops.");
|
||||||
|
|
||||||
|
CAFFE_ENFORCE(runs_ > 1, "# of runs: ", runs_, ", expected > 1.");
|
||||||
|
int measured_runs = runs_ - 1;
|
||||||
|
|
||||||
|
for (int idx = 0; idx < operator_nodes_.size(); idx++) {
|
||||||
|
auto& node = operator_nodes_[idx];
|
||||||
|
const string& op_type = node.operator_->def().type();
|
||||||
|
const string& print_name = node.operator_->def().name().size()
|
||||||
|
? node.operator_->def().name()
|
||||||
|
: (node.operator_->def().output_size() ? node.operator_->def().output(0)
|
||||||
|
: "NO_OUTPUT");
|
||||||
|
|
||||||
|
float mean = time_per_op_[idx].sum / measured_runs;
|
||||||
|
float stddev =
|
||||||
|
std::sqrt(time_per_op_[idx].sqrsum / measured_runs - mean * mean);
|
||||||
|
VLOG(1) << "Op #" << idx << " (" << print_name << ", " << op_type << ") "
|
||||||
|
<< mean << " ms/iter (" << stddev << " ms/iter)";
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG(INFO) << "Time per operator type:";
|
||||||
|
for (const auto& item : time_per_op_type_) {
|
||||||
|
float mean = item.second.sum / measured_runs;
|
||||||
|
float stddev = std::sqrt(item.second.sqrsum / measured_runs - mean * mean);
|
||||||
|
LOG(INFO) << std::setw(10) << std::setfill(' ') << mean << " ms/iter ("
|
||||||
|
<< std::setw(10) << std::setfill(' ') << stddev << " ms/iter) "
|
||||||
|
<< item.first;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
REGISTER_NET(prof_dag, ProfDAGNet);
|
REGISTER_NET(prof_dag, ProfDAGNet);
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
36
caffe2/contrib/prof/prof_dag_net.h
Normal file
36
caffe2/contrib/prof/prof_dag_net.h
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "caffe2/core/net.h"
|
||||||
|
#include "caffe2/proto/prof_dag.pb.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
|
||||||
|
struct Stats {
|
||||||
|
float sum;
|
||||||
|
float sqrsum;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This net type is identical to DAGNet, except that it
|
||||||
|
* measures the time taken for each and every operator.
|
||||||
|
*
|
||||||
|
* To collect statistics from stable runs, this net ignores the first run.
|
||||||
|
* Thus, at least two runs are required for this net to print operator metrics.
|
||||||
|
*/
|
||||||
|
class ProfDAGNet : public DAGNetBase {
|
||||||
|
public:
|
||||||
|
ProfDAGNet(const NetDef& net_def, Workspace* ws);
|
||||||
|
~ProfDAGNet();
|
||||||
|
bool Run() override;
|
||||||
|
ProfDAGProtos GetOperatorStats();
|
||||||
|
|
||||||
|
protected:
|
||||||
|
bool RunAt(const std::vector<int>& chain) override;
|
||||||
|
void PrintStats();
|
||||||
|
ProfDAGProto ProtoMsg(std::pair<std::string, Stats> op_stat) const;
|
||||||
|
std::vector<Stats> time_per_op_;
|
||||||
|
CaffeMap<std::string, Stats> time_per_op_type_;
|
||||||
|
int runs_ = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace caffe2
|
18
caffe2/contrib/prof/prof_dag_stats_op.cc
Normal file
18
caffe2/contrib/prof/prof_dag_stats_op.cc
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#include "prof_dag_stats_op.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
REGISTER_CPU_OPERATOR(GetProfDagStats, GetProfDagStatsOp<float, CPUContext>);
|
||||||
|
|
||||||
|
OPERATOR_SCHEMA(GetProfDagStats)
|
||||||
|
.NumInputs(0)
|
||||||
|
.NumOutputs(1)
|
||||||
|
.SetDoc(R"DOC(
|
||||||
|
Gets the profiling statistics.
|
||||||
|
)DOC")
|
||||||
|
.Arg(
|
||||||
|
"net_name",
|
||||||
|
"(string) default to empty; describes the name of the ProfDAGNet");
|
||||||
|
} // namespace
|
||||||
|
} // namespace caffe2
|
48
caffe2/contrib/prof/prof_dag_stats_op.h
Normal file
48
caffe2/contrib/prof/prof_dag_stats_op.h
Normal file
@ -0,0 +1,48 @@
|
|||||||
|
#ifndef CAFFE2_OPERATORS_FULLY_CONNECTED_OP_H_
|
||||||
|
#define CAFFE2_OPERATORS_FULLY_CONNECTED_OP_H_
|
||||||
|
|
||||||
|
#include "caffe2/contrib/prof/prof_dag_net.h"
|
||||||
|
#include "caffe2/core/context.h"
|
||||||
|
#include "caffe2/core/operator.h"
|
||||||
|
|
||||||
|
namespace caffe2 {
|
||||||
|
|
||||||
|
// This operator outputs the ProfDAGNet stats
|
||||||
|
template <typename T, class Context, class Engine = DefaultEngine>
|
||||||
|
class GetProfDagStatsOp final : public Operator<Context> {
|
||||||
|
public:
|
||||||
|
USE_OPERATOR_CONTEXT_FUNCTIONS;
|
||||||
|
GetProfDagStatsOp(const OperatorDef& operator_def, Workspace* ws)
|
||||||
|
: Operator<Context>(operator_def, ws),
|
||||||
|
net_name_(
|
||||||
|
OperatorBase::GetSingleArgument<std::string>("net_name", "")) {
|
||||||
|
ws_ = ws;
|
||||||
|
}
|
||||||
|
~GetProfDagStatsOp() {}
|
||||||
|
|
||||||
|
bool RunOnDevice() override {
|
||||||
|
// Read operator statistics for net_name_
|
||||||
|
CAFFE_ENFORCE(!net_name_.empty(), "You need to provide net_name");
|
||||||
|
auto* net = ws_->GetNet(net_name_);
|
||||||
|
|
||||||
|
auto prof_dag_net = dynamic_cast_if_rtti<ProfDAGNet*>(net);
|
||||||
|
CAFFE_ENFORCE(prof_dag_net);
|
||||||
|
auto stats = prof_dag_net->GetOperatorStats();
|
||||||
|
|
||||||
|
// Write protobuf message to the output blob
|
||||||
|
std::string serialized_data;
|
||||||
|
CAFFE_ENFORCE(stats.SerializeToString(&serialized_data));
|
||||||
|
Output(0)->Resize(1);
|
||||||
|
Output(0)->template mutable_data<std::string>()[0] = serialized_data;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::string net_name_;
|
||||||
|
Workspace* ws_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace caffe2
|
||||||
|
|
||||||
|
#endif // CAFFE2_OPERATORS_FULLY_CONNECTED_OP_H_
|
26
caffe2/proto/prof_dag.proto
Normal file
26
caffe2/proto/prof_dag.proto
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
syntax = "proto2";
|
||||||
|
|
||||||
|
package caffe2;
|
||||||
|
|
||||||
|
// A few notes about the Caffe2's protobuffer convention:
|
||||||
|
// (1) Most objects are registered by their types, such as operators and nets.
|
||||||
|
// For these, we have a string-type field "type" for registration purposes.
|
||||||
|
// (2) We do not use extension because that used to create quite some conflicts
|
||||||
|
// in Caffe's protobuf design.
|
||||||
|
// (3) We have not used any proto3 specific features, such as Any or Map. This
|
||||||
|
// is mainly for backward compability purposes but we may consider using
|
||||||
|
// those in the future.
|
||||||
|
|
||||||
|
// Protobuf format to serialize profiler data
|
||||||
|
message ProfDAGProto {
|
||||||
|
// The name for the operator
|
||||||
|
required string name = 1;
|
||||||
|
// The mean execution time
|
||||||
|
required float mean = 2;
|
||||||
|
// The standard deviation
|
||||||
|
required float stddev = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
message ProfDAGProtos {
|
||||||
|
repeated ProfDAGProto stats = 1;
|
||||||
|
}
|
Reference in New Issue
Block a user