Lightweight logging for once-only API usage

This commit is contained in:
Dmytro Dzhulgakov
2019-05-19 23:01:09 -07:00
parent 7b9ee598d6
commit d3059b9c49
24 changed files with 146 additions and 46 deletions

View File

@ -39,6 +39,7 @@ namespace detail {
// compilation unit (alt is to have another method in hooks, but
// let's not if we don't need to!)
std::unique_ptr<THCState, void (*)(THCState*)> CUDAHooks::initCUDA() const {
C10_LOG_API_USAGE_ONCE("aten.init.cuda");
THCState* thc_state = THCState_alloc();
THCudaInit(thc_state);

View File

@ -48,8 +48,12 @@ TensorImpl::TensorImpl(Storage&& storage, TensorTypeId type_id, const caffe2::Ty
data_type_(data_type),
device_opt_(device_opt),
type_id_(type_id) {
AT_ASSERT(type_id == UndefinedTensorId() || data_type.id() == caffe2::TypeIdentifier::uninitialized() ||
device_opt_.has_value());
if (type_id != UndefinedTensorId()) {
AT_ASSERT(data_type.id() == caffe2::TypeIdentifier::uninitialized() ||
device_opt_.has_value());
// UndefinedTensorImpl is a singleton, so we skip logging it
C10_LOG_API_USAGE_ONCE("tensor.create");
}
// we would also like to check that non-cpu devices have an index, but some Caffe2 operators create
// Storages with default devices.
strides_.push_back(1);

View File

@ -29,10 +29,21 @@
classname(const classname&) = delete; \
classname& operator=(const classname&) = delete
#define CONCAT_IMPL(x, y) x##y
#define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y)
#define C10_CONCATENATE_IMPL(s1, s2) s1##s2
#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)
#define C10_MACRO_EXPAND(args) args
/**
* C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
* str and ending with a number that varies with the line.
*/
#ifdef __COUNTER__
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
#else
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
#endif
#define MACRO_EXPAND(args) args
/// C10_NODISCARD - Warn if a type or return value is discarded.

View File

@ -1,9 +1,10 @@
#include "c10/util/Logging.h"
#include "c10/util/Flags.h"
#include "c10/util/Backtrace.h"
#include "c10/util/Flags.h"
#include <algorithm>
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <numeric>
@ -24,7 +25,9 @@ namespace enforce_detail {
namespace {
std::function<string(void)>* GetFetchStackTrace() {
static std::function<string(void)> func = []() { return get_backtrace(/*frames_to_skip=*/ 1); };
static std::function<string(void)> func = []() {
return get_backtrace(/*frames_to_skip=*/1);
};
return &func;
};
} // namespace
@ -49,12 +52,43 @@ void ThrowEnforceNotMet(
// PyTorch-style error message
// (This must be defined here for access to GetFetchStackTrace)
Error::Error(SourceLocation source_location, const std::string& msg)
: Error(
msg,
str(" (",
source_location,
")\n",
(*GetFetchStackTrace())())) {}
: Error(msg, str(" (", source_location, ")\n", (*GetFetchStackTrace())())) {
}
using APIUsageLoggerType = std::function<void(const std::string&)>;
namespace {
bool IsAPIUsageDebugMode() {
return getenv("PYTORCH_API_USAGE_STDERR");
}
void APIUsageDebug(const string& event) {
// use stderr to avoid messing with glog
std::cerr << "PYTORCH_API_USAGE " << event << std::endl;
}
APIUsageLoggerType* GetAPIUsageLogger() {
static APIUsageLoggerType func =
IsAPIUsageDebugMode() ? &APIUsageDebug : [](const string&) {};
return &func;
};
} // namespace
void SetAPIUsageLogger(std::function<void(const std::string&)> logger) {
AT_ASSERT(logger);
*GetAPIUsageLogger() = logger;
}
void LogAPIUsage(const std::string& event) {
(*GetAPIUsageLogger())(event);
}
namespace detail {
bool LogAPIUsageFakeReturn(const std::string& event) {
(*GetAPIUsageLogger())(event);
return true;
}
}
} // namespace c10

View File

@ -257,6 +257,32 @@ BINARY_COMP_HELPER(LessEquals, <=)
#define CAFFE_ENFORCE_GT_WITH_CALLER(x, y, ...) \
CAFFE_ENFORCE_THAT_IMPL_WITH_CALLER( \
Greater((x), (y)), #x " > " #y, __VA_ARGS__)
/**
* Very lightweight logging for the first time API usage. It's beneficial for
* tracking of individual functionality usage in larger applications.
*
* In order to ensure light-weightness of logging, we utilize static variable
* trick - LogAPIUsage will be invoked only once and further invocations will
* just do an atomic check.
*
* Example:
* // Logs caller info with an arbitrary text event, if there is a usage.
* C10_LOG_API_USAGE_ONCE("my_api");
*/
#define C10_LOG_API_USAGE_ONCE(...) \
C10_UNUSED static bool C10_ANONYMOUS_VARIABLE(logFlag) = \
::c10::detail::LogAPIUsageFakeReturn(__VA_ARGS__);
// API usage logging capabilities
C10_API void SetAPIUsageLogger(std::function<void(const std::string&)> logger);
C10_API void LogAPIUsage(const std::string& context);
namespace detail {
// Return value is needed to do the static variable initialization trick
C10_API bool LogAPIUsageFakeReturn(const std::string& context);
}
} // namespace c10
#endif // C10_UTIL_LOGGING_H_

View File

@ -185,18 +185,6 @@ class Registerer {
}
};
/**
* C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
* str and ending with a number that varies with the line.
*/
#define C10_CONCATENATE_IMPL(s1, s2) s1##s2
#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)
#ifdef __COUNTER__
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
#else
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
#endif
/**
* C10_DECLARE_TYPED_REGISTRY is a macro that expands to a function
* declaration, as well as creating a convenient typename for its corresponding

View File

@ -23,10 +23,10 @@
#include <c10/util/Exception.h>
#include <c10/util/Half.h>
#include <c10/util/IdWrapper.h>
#include <c10/util/Type.h>
#include <c10/util/qint32.h>
#include <c10/util/qint8.h>
#include <c10/util/quint8.h>
#include <c10/util/qint32.h>
#include <c10/util/Type.h>
/*
* TypeIdentifier is a small type containing an id.
@ -498,15 +498,15 @@ inline std::ostream& operator<<(
#define EXPORT_IF_NOT_GCC
#endif
#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter) \
namespace detail { \
const TypeMetaData MACRO_CONCAT(_typeMetaDataInstance_, Counter) = \
_makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
} \
template <> \
EXPORT_IF_NOT_GCC const detail::TypeMetaData* \
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
return &MACRO_CONCAT(detail::_typeMetaDataInstance_, Counter); \
#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter) \
namespace detail { \
const TypeMetaData C10_CONCATENATE(_typeMetaDataInstance_, Counter) = \
_makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
} \
template <> \
EXPORT_IF_NOT_GCC const detail::TypeMetaData* \
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
return &C10_CONCATENATE(detail::_typeMetaDataInstance_, Counter); \
}
#define CAFFE_KNOWN_TYPE(T) \
template <> \
@ -529,20 +529,20 @@ inline std::ostream& operator<<(
return TypeIdentifier(PreallocatedId); \
} \
namespace detail { \
C10_API extern const TypeMetaData MACRO_CONCAT( \
C10_API extern const TypeMetaData C10_CONCATENATE( \
_typeMetaDataInstance_preallocated_, \
PreallocatedId); \
}
#define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T) \
namespace detail { \
C10_EXPORT const TypeMetaData MACRO_CONCAT( \
C10_EXPORT const TypeMetaData C10_CONCATENATE( \
_typeMetaDataInstance_preallocated_, \
PreallocatedId) = _makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
} \
template <> \
C10_EXPORT const detail::TypeMetaData* \
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
return &MACRO_CONCAT( \
return &C10_CONCATENATE( \
detail::_typeMetaDataInstance_preallocated_, PreallocatedId); \
}
#else // _MSC_VER
@ -552,19 +552,19 @@ inline std::ostream& operator<<(
return TypeIdentifier(PreallocatedId); \
} \
namespace detail { \
C10_EXPORT extern const TypeMetaData MACRO_CONCAT( \
C10_EXPORT extern const TypeMetaData C10_CONCATENATE( \
_typeMetaDataInstance_preallocated_, \
PreallocatedId); \
} \
template <> \
inline const detail::TypeMetaData* \
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
return &MACRO_CONCAT( \
return &C10_CONCATENATE( \
detail::_typeMetaDataInstance_preallocated_, PreallocatedId); \
}
#define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T) \
namespace detail { \
const TypeMetaData MACRO_CONCAT( \
const TypeMetaData C10_CONCATENATE( \
_typeMetaDataInstance_preallocated_, \
PreallocatedId) = _makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
}

View File

@ -201,6 +201,7 @@ static void Caffe2InitializeCuda() {
VLOG(1) << "No cuda gpu present. Skipping.";
return;
}
C10_LOG_API_USAGE_ONCE("caffe2.init.cuda");
// Check if the number of GPUs matches the expected compile-time max number
// of GPUs.
CAFFE_ENFORCE_LE(

View File

@ -42,6 +42,7 @@ bool GlobalInitAlreadyRun() {
}
bool GlobalInit(int* pargc, char*** pargv) {
C10_LOG_API_USAGE_ONCE("caffe2.global_init");
static std::recursive_mutex init_mutex;
std::lock_guard<std::recursive_mutex> guard(init_mutex);
internal::State& init_state = internal::GlobalInitState();

View File

@ -37,6 +37,7 @@ NetBase::NetBase(
name_(def->name()),
net_def_(def) {
static GlobalInitIsCalledGuard guard;
C10_LOG_API_USAGE_ONCE("caffe2.net.create");
// Check that node_name is empty for all ops
for (const OperatorDef& op : def->op()) {
if (op.has_device_option()) {

View File

@ -386,6 +386,7 @@ C10_DEFINE_REGISTRY(
GradientOpsMeta GetGradientForOp(
const OperatorDef& def, const vector<GradientWrapper>& g_output) {
C10_LOG_API_USAGE_ONCE("caffe2.gradient_maker");
std::unique_ptr<GradientMakerBase> maker(
GradientRegistry()->Create(def.type(), def, g_output));
CAFFE_ENFORCE(maker,

View File

@ -1267,7 +1267,7 @@ C10_DECLARE_REGISTRY(
#define REGISTER_CPU_GRADIENT_OPERATOR(...) /* No gradients. */
#else
#define REGISTER_CPU_GRADIENT_OPERATOR(...) \
MACRO_EXPAND(REGISTER_CPU_OPERATOR(__VA_ARGS__))
C10_MACRO_EXPAND(REGISTER_CPU_OPERATOR(__VA_ARGS__))
#endif
C10_DECLARE_REGISTRY(

View File

@ -1819,6 +1819,8 @@ void addGlobalMethods(py::module& m) {
PYBIND11_MODULE(caffe2_pybind11_state, m) {
m.doc() = "pybind11 stateful interface to Caffe2 workspaces";
C10_LOG_API_USAGE_ONCE("caffe2.python.import");
addGlobalMethods(m);
addObjectMethods(m);
for (const auto& addition : PybindAdditionRegistry()->Keys()) {

View File

@ -9,6 +9,7 @@
#include <cstdlib>
#include <libshm.h>
#include <TH/TH.h>
#include <c10/util/Logging.h>
#include <ATen/ATen.h>
#include <ATen/ExpandUtils.h>
#include <ATen/dlpack.h>
@ -565,6 +566,16 @@ static void warning_handler(
}
}
// In Python we can't use the trick of C10_LOG_API_USAGE_ONCE
// Guaranteed to be invoked from Python under GIL, no locking on map needed
static void LogAPIUsageOnceFromPython(const std::string& event) {
static std::unordered_set<std::string> seen;
if (!seen.count(event)) {
seen.insert(event);
c10::LogAPIUsage(event);
}
}
#ifdef _WIN32
__declspec(dllexport)
@ -573,6 +584,8 @@ PyObject* initModule() {
HANDLE_TH_ERRORS
at::init_num_threads();
C10_LOG_API_USAGE_ONCE("torch.python.import");
#define ASSERT_TRUE(cmd) if (!(cmd)) return nullptr
THPUtils_addPyMethodDefs(methods, TorchMethods);
@ -675,7 +688,9 @@ PyObject* initModule() {
// setting up TH Errors so that they throw C++ exceptions
at::init();
py::reinterpret_borrow<py::module>(module).def("_demangle", &c10::demangle);
auto py_module = py::reinterpret_borrow<py::module>(module);
py_module.def("_demangle", &c10::demangle);
py_module.def("_log_api_usage_once", &LogAPIUsageOnceFromPython);
// Set ATen warnings to issue Python warnings
::c10::Warning::set_warning_handler(&warning_handler);

View File

@ -36,6 +36,7 @@ template <typename T>
using shared_ptr_class_ = py::class_<T, std::shared_ptr<T>>;
PyObject* c10d_init(PyObject* _unused) {
C10_LOG_API_USAGE_ONCE("c10d.python.import");
auto c10d_module = THPObjectPtr(PyImport_ImportModule("torch.distributed"));
if (!c10d_module) {
throw python_error();

View File

@ -550,6 +550,7 @@ ScriptModuleSerializer::ScriptModuleSerializer(std::ostream* ofs)
void ScriptModuleSerializer::serialize(
const script::Module& module,
const script::ExtraFilesMap& extra_files) {
C10_LOG_API_USAGE_ONCE("torch.script.save");
torch::ModelDef model_def;
convertModel(module, &model_def, extra_files);
std::string output;

View File

@ -488,6 +488,7 @@ struct GraphExecutorImpl : public GraphExecutorImplBase {
" inputs, but got only ",
stack.size());
C10_LOG_API_USAGE_ONCE("torch.graph_executor.run");
logging::getLogger()->addStatValue(
logging::runtime_counters::GRAPH_EXECUTOR_INVOCATIONS, 1.0);
@ -579,7 +580,7 @@ struct GraphExecutorImpl : public GraphExecutorImplBase {
for (Node* dnode : diff_nodes) {
auto diff_graph = std::move(dnode->g(attr::Subgraph));
Gradient gradient = differentiate(diff_graph);
// Run post differentiation optimizations, Autodiff will replace some
// Run post differentiation optimizations, Autodiff will replace some
// parts of graph with new graph, these new graphs usually consists of
// control flows and miss shape information on nodes, so we run shape
// prop and differentiable optimizations to ensure the graph is optimized

View File

@ -95,6 +95,7 @@ void ScriptModuleDeserializer::deserialize(
script::ModuleLookup module_lookup,
c10::optional<at::Device> device,
script::ExtraFilesMap& extra_files) {
C10_LOG_API_USAGE_ONCE("torch.script.load");
torch::ModelDef model_def;
at::DataPtr data_ptr;
size_t data_size;

View File

@ -42,6 +42,8 @@ std::shared_ptr<torch::jit::Graph> createGraphByTracing(
const py::function& var_name_lookup_fn,
bool force_outplace,
const std::shared_ptr<script::Module>& self) {
C10_LOG_API_USAGE_ONCE("torch.tracer");
auto enter_info = tracer::enter(std::move(trace_inputs), self);
auto graph = enter_info.first->graph;

View File

@ -531,6 +531,7 @@ void initJitScriptBindings(PyObject* module) {
m.def(
"_jit_script_compile",
[](const Def& def, ResolutionCallback rcb, FunctionDefaults defaults) {
C10_LOG_API_USAGE_ONCE("torch.script.compile");
CompilationUnit cu;
cu.define({def}, {pythonResolver(rcb)}, nullptr);
std::shared_ptr<Function> defined = cu.get_functions().at(0);
@ -561,6 +562,7 @@ void initJitScriptBindings(PyObject* module) {
[](const std::string& qualifiedName,
const ClassDef& classDef,
ResolutionCallback rcb) {
C10_LOG_API_USAGE_ONCE("torch.script.class");
auto cu = std::make_shared<CompilationUnit>();
auto classType =
ClassType::create(c10::QualifiedName(qualifiedName), cu);

View File

@ -1,5 +1,7 @@
#include <c10d/ProcessGroup.hpp>
#include <c10/util/Logging.h>
namespace c10d {
ProcessGroup::Work::~Work() {}
@ -44,7 +46,9 @@ void ProcessGroup::Work::finish(std::exception_ptr exception) {
cv_.notify_all();
}
ProcessGroup::ProcessGroup(int rank, int size) : rank_(rank), size_(size) {}
ProcessGroup::ProcessGroup(int rank, int size) : rank_(rank), size_(size) {
C10_LOG_API_USAGE_ONCE("c10d.process_group");
}
ProcessGroup::~ProcessGroup() {}

View File

@ -63,6 +63,7 @@ class Module(object):
_version = 1
def __init__(self):
torch._C._log_api_usage_once("python.nn_module")
self._backend = thnn_backend
self._parameters = OrderedDict()
self._buffers = OrderedDict()

View File

@ -30,6 +30,7 @@ class Optimizer(object):
"""
def __init__(self, params, defaults):
torch._C._log_api_usage_once("python.optimizer")
self.defaults = defaults
if isinstance(params, torch.Tensor):

View File

@ -142,6 +142,7 @@ class DataLoader(object):
batch_sampler=None, num_workers=0, collate_fn=default_collate,
pin_memory=False, drop_last=False, timeout=0,
worker_init_fn=None):
torch._C._log_api_usage_once("python.data_loader")
self.dataset = dataset
self.batch_size = batch_size
self.num_workers = num_workers