Expose operators execution statistics to python frontend.

Summary: To expose operators execution statistics in python, profiling measurements collected in ProfDAGNet class is leveraged. In current implementation, a new operator is defined that outputs the statistic data in a protobuf message. In the frontend, OperatorStatsContainer works as a wrapper to print ProfDAGNet statistics.

Differential Revision: D4923009

fbshipit-source-id: 18a6d76a405ef277a3fca7a312609051cf943207
This commit is contained in:
Mohammad Hossain
2017-04-21 16:15:54 -07:00
committed by Yangqing Jia
parent 09bb91022a
commit 3b0069a014
6 changed files with 279 additions and 128 deletions

View File

@ -0,0 +1,24 @@
## @package utils
# Module caffe2.contrib.perf_contbld.utils
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from caffe2.proto import prof_dag_pb2
class OperatorStatsContainer():
'''
This class works as a wrapper to print ProfDAGNet statistics
'''
def __init__(self, stats_proto):
self.op_stats = prof_dag_pb2.ProfDAGProtos()
self.op_stats.ParseFromString(stats_proto)
def Print(self):
print("Time per operator type:")
for stat in self.op_stats.stats:
print("{:12.6f} ms/iter [{:10.6f} ms/iter ]\t{}".format(
stat.mean, stat.stddev, stat.name
))

View File

@ -1,150 +1,149 @@
#include "prof_dag_net.h"
#include <cmath>
#include "caffe2/core/net.h"
#include "caffe2/core/operator.h"
#include "caffe2/core/timer.h"
namespace caffe2 {
namespace {
struct Stats {
float sum;
float sqrsum;
};
ProfDAGNet::ProfDAGNet(const NetDef& net_def, Workspace* ws)
: DAGNetBase(net_def, ws), time_per_op_(operator_nodes_.size()) {
VLOG(1) << "Constructing ProfDAGNet " << name_;
}
/**
* This net type is identical to DAGNet, except that it
* measures the time taken for each and every operator.
*
* To collect statistics from stable runs, this net ignores the first run.
* Thus, at least two runs are required for this net to print operator metrics.
*/
class ProfDAGNet : public DAGNetBase {
public:
ProfDAGNet(const NetDef& net_def, Workspace* ws)
: DAGNetBase(net_def, ws), time_per_op_(operator_nodes_.size()) {
VLOG(1) << "Constructing ProfDAGNet " << name_;
ProfDAGNet::~ProfDAGNet() {
VLOG(1) << "Closing ProfDAGNet " << name_;
if (runs_ <= 1) {
LOG(INFO) << "Insufficient runs to produce meaningful data.";
return;
}
PrintStats();
}
bool ProfDAGNet::Run() {
runs_++;
// don't collect statistics from first run
if (runs_ <= 1) {
return DAGNetBase::Run();
}
bool Run() override {
runs_++;
CAFFE_ENFORCE(
time_per_op_.size() == operator_nodes_.size(),
"Data collected for ",
time_per_op_.size(),
" ops, expected ",
operator_nodes_.size(),
" ops.");
// create a copy and later collect the differences
vector<Stats> time_per_op_run(time_per_op_);
bool success = DAGNetBase::Run();
// aggregate this run's stats per operator type
CaffeMap<string, float> time_per_op_type_run;
for (int idx = 0; idx < operator_nodes_.size(); idx++) {
const auto& node = operator_nodes_[idx];
const string& op_type = node.operator_->def().type();
time_per_op_type_run[op_type] +=
time_per_op_[idx].sum - time_per_op_run[idx].sum;
}
for (const auto& item : time_per_op_type_run) {
time_per_op_type_[item.first].sum += item.second;
time_per_op_type_[item.first].sqrsum += item.second * item.second;
}
return success;
}
ProfDAGProto ProfDAGNet::ProtoMsg(std::pair<std::string, Stats> op_stat) const {
ProfDAGProto message;
float mean = op_stat.second.sum / (runs_ - 1);
float stddev = std::sqrt(op_stat.second.sqrsum / (runs_ - 1) - mean * mean);
message.set_mean(mean);
message.set_stddev(stddev);
message.set_name(op_stat.first);
return message;
}
ProfDAGProtos ProfDAGNet::GetOperatorStats() {
ProfDAGProtos prof_dag_protos;
for (auto& item : time_per_op_type_) {
auto buf = prof_dag_protos.add_stats();
buf->CopyFrom(ProtoMsg(item));
}
return prof_dag_protos;
}
bool ProfDAGNet::RunAt(const std::vector<int>& chain) {
bool success = true;
Timer timer;
for (const auto idx : chain) {
// don't collect metrics from first run
if (runs_ <= 1) {
return DAGNetBase::Run();
}
success &= operator_nodes_[idx].operator_->Run();
CAFFE_ENFORCE(
time_per_op_.size() == operator_nodes_.size(),
"Data collected for ",
time_per_op_.size(),
" ops, expected ",
operator_nodes_.size(),
" ops.");
} else {
timer.Start();
success &= operator_nodes_[idx].operator_->Run();
float spent = timer.MilliSeconds();
// create a copy and later collect the differences
vector<Stats> time_per_op_run(time_per_op_);
bool success = DAGNetBase::Run();
// aggregate this run's stats per operator type
CaffeMap<string, float> time_per_op_type_run;
for (int idx = 0; idx < operator_nodes_.size(); idx++) {
const auto& node = operator_nodes_[idx];
const string& op_type = node.operator_->def().type();
time_per_op_type_run[op_type] +=
time_per_op_[idx].sum - time_per_op_run[idx].sum;
}
for (const auto& item : time_per_op_type_run) {
time_per_op_type_[item.first].sum += item.second;
time_per_op_type_[item.first].sqrsum += item.second * item.second;
}
return success;
}
~ProfDAGNet() {
VLOG(1) << "Closing ProfDAGNet " << name_;
if (runs_ <= 1) {
LOG(INFO) << "Insufficient runs to produce meaningful data.";
return;
}
PrintStats();
}
protected:
bool RunAt(const std::vector<int>& chain) override {
bool success = true;
Timer timer;
for (const auto idx : chain) {
// don't collect metrics from first run
if (runs_ <= 1) {
success &= operator_nodes_[idx].operator_->Run();
} else {
timer.Start();
success &= operator_nodes_[idx].operator_->Run();
float spent = timer.MilliSeconds();
CAFFE_ENFORCE(
time_per_op_.size() > idx,
"Expecting ",
time_per_op_.size(),
" ops, but op #",
idx,
" was given.");
time_per_op_[idx].sum += spent;
time_per_op_[idx].sqrsum += spent * spent;
}
}
return success;
}
void PrintStats() {
CAFFE_ENFORCE(
time_per_op_.size() == operator_nodes_.size(),
"Data collected for ",
time_per_op_.size(),
" ops, expected ",
operator_nodes_.size(),
" ops.");
CAFFE_ENFORCE(runs_ > 1, "# of runs: ", runs_, ", expected > 1.");
int measured_runs = runs_ - 1;
for (int idx = 0; idx < operator_nodes_.size(); idx++) {
auto& node = operator_nodes_[idx];
const string& op_type = node.operator_->def().type();
const string& print_name = node.operator_->def().name().size()
? node.operator_->def().name()
: (node.operator_->def().output_size()
? node.operator_->def().output(0)
: "NO_OUTPUT");
float mean = time_per_op_[idx].sum / measured_runs;
float stddev =
std::sqrt(time_per_op_[idx].sqrsum / measured_runs - mean * mean);
LOG(INFO) << "Op #" << idx << " (" << print_name << ", " << op_type
<< ") " << mean << " ms/iter (" << stddev << " ms/iter)";
}
LOG(INFO) << "Time per operator type:";
for (const auto& item : time_per_op_type_) {
float mean = item.second.sum / measured_runs;
float stddev =
std::sqrt(item.second.sqrsum / measured_runs - mean * mean);
LOG(INFO) << std::setw(10) << std::setfill(' ') << mean << " ms/iter ("
<< std::setw(10) << std::setfill(' ') << stddev << " ms/iter) "
<< item.first;
CAFFE_ENFORCE(
time_per_op_.size() > idx,
"Expecting ",
time_per_op_.size(),
" ops, but op #",
idx,
" was given.");
time_per_op_[idx].sum += spent;
time_per_op_[idx].sqrsum += spent * spent;
}
}
return success;
}
vector<Stats> time_per_op_;
CaffeMap<string, Stats> time_per_op_type_;
int runs_ = 0;
};
void ProfDAGNet::PrintStats() {
CAFFE_ENFORCE(
time_per_op_.size() == operator_nodes_.size(),
"Data collected for ",
time_per_op_.size(),
" ops, expected ",
operator_nodes_.size(),
" ops.");
CAFFE_ENFORCE(runs_ > 1, "# of runs: ", runs_, ", expected > 1.");
int measured_runs = runs_ - 1;
for (int idx = 0; idx < operator_nodes_.size(); idx++) {
auto& node = operator_nodes_[idx];
const string& op_type = node.operator_->def().type();
const string& print_name = node.operator_->def().name().size()
? node.operator_->def().name()
: (node.operator_->def().output_size() ? node.operator_->def().output(0)
: "NO_OUTPUT");
float mean = time_per_op_[idx].sum / measured_runs;
float stddev =
std::sqrt(time_per_op_[idx].sqrsum / measured_runs - mean * mean);
VLOG(1) << "Op #" << idx << " (" << print_name << ", " << op_type << ") "
<< mean << " ms/iter (" << stddev << " ms/iter)";
}
LOG(INFO) << "Time per operator type:";
for (const auto& item : time_per_op_type_) {
float mean = item.second.sum / measured_runs;
float stddev = std::sqrt(item.second.sqrsum / measured_runs - mean * mean);
LOG(INFO) << std::setw(10) << std::setfill(' ') << mean << " ms/iter ("
<< std::setw(10) << std::setfill(' ') << stddev << " ms/iter) "
<< item.first;
}
}
namespace {
REGISTER_NET(prof_dag, ProfDAGNet);
}
} // namespace
} // namespace caffe2

View File

@ -0,0 +1,36 @@
#pragma once
#include "caffe2/core/net.h"
#include "caffe2/proto/prof_dag.pb.h"
namespace caffe2 {
struct Stats {
float sum;
float sqrsum;
};
/**
* This net type is identical to DAGNet, except that it
* measures the time taken for each and every operator.
*
* To collect statistics from stable runs, this net ignores the first run.
* Thus, at least two runs are required for this net to print operator metrics.
*/
class ProfDAGNet : public DAGNetBase {
public:
ProfDAGNet(const NetDef& net_def, Workspace* ws);
~ProfDAGNet();
bool Run() override;
ProfDAGProtos GetOperatorStats();
protected:
bool RunAt(const std::vector<int>& chain) override;
void PrintStats();
ProfDAGProto ProtoMsg(std::pair<std::string, Stats> op_stat) const;
std::vector<Stats> time_per_op_;
CaffeMap<std::string, Stats> time_per_op_type_;
int runs_ = 0;
};
} // namespace caffe2

View File

@ -0,0 +1,18 @@
#include "prof_dag_stats_op.h"
namespace caffe2 {
namespace {
REGISTER_CPU_OPERATOR(GetProfDagStats, GetProfDagStatsOp<float, CPUContext>);
OPERATOR_SCHEMA(GetProfDagStats)
.NumInputs(0)
.NumOutputs(1)
.SetDoc(R"DOC(
Gets the profiling statistics.
)DOC")
.Arg(
"net_name",
"(string) default to empty; describes the name of the ProfDAGNet");
} // namespace
} // namespace caffe2

View File

@ -0,0 +1,48 @@
#ifndef CAFFE2_OPERATORS_FULLY_CONNECTED_OP_H_
#define CAFFE2_OPERATORS_FULLY_CONNECTED_OP_H_
#include "caffe2/contrib/prof/prof_dag_net.h"
#include "caffe2/core/context.h"
#include "caffe2/core/operator.h"
namespace caffe2 {
// This operator outputs the ProfDAGNet stats
template <typename T, class Context, class Engine = DefaultEngine>
class GetProfDagStatsOp final : public Operator<Context> {
public:
USE_OPERATOR_CONTEXT_FUNCTIONS;
GetProfDagStatsOp(const OperatorDef& operator_def, Workspace* ws)
: Operator<Context>(operator_def, ws),
net_name_(
OperatorBase::GetSingleArgument<std::string>("net_name", "")) {
ws_ = ws;
}
~GetProfDagStatsOp() {}
bool RunOnDevice() override {
// Read operator statistics for net_name_
CAFFE_ENFORCE(!net_name_.empty(), "You need to provide net_name");
auto* net = ws_->GetNet(net_name_);
auto prof_dag_net = dynamic_cast_if_rtti<ProfDAGNet*>(net);
CAFFE_ENFORCE(prof_dag_net);
auto stats = prof_dag_net->GetOperatorStats();
// Write protobuf message to the output blob
std::string serialized_data;
CAFFE_ENFORCE(stats.SerializeToString(&serialized_data));
Output(0)->Resize(1);
Output(0)->template mutable_data<std::string>()[0] = serialized_data;
return true;
}
protected:
std::string net_name_;
Workspace* ws_;
};
} // namespace caffe2
#endif // CAFFE2_OPERATORS_FULLY_CONNECTED_OP_H_

View File

@ -0,0 +1,26 @@
syntax = "proto2";
package caffe2;
// A few notes about the Caffe2's protobuffer convention:
// (1) Most objects are registered by their types, such as operators and nets.
// For these, we have a string-type field "type" for registration purposes.
// (2) We do not use extension because that used to create quite some conflicts
// in Caffe's protobuf design.
// (3) We have not used any proto3 specific features, such as Any or Map. This
// is mainly for backward compability purposes but we may consider using
// those in the future.
// Protobuf format to serialize profiler data
message ProfDAGProto {
// The name for the operator
required string name = 1;
// The mean execution time
required float mean = 2;
// The standard deviation
required float stddev = 3;
}
message ProfDAGProtos {
repeated ProfDAGProto stats = 1;
}