mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Lightweight logging for once-only API usage
This commit is contained in:
@ -39,6 +39,7 @@ namespace detail {
|
||||
// compilation unit (alt is to have another method in hooks, but
|
||||
// let's not if we don't need to!)
|
||||
std::unique_ptr<THCState, void (*)(THCState*)> CUDAHooks::initCUDA() const {
|
||||
C10_LOG_API_USAGE_ONCE("aten.init.cuda");
|
||||
THCState* thc_state = THCState_alloc();
|
||||
|
||||
THCudaInit(thc_state);
|
||||
|
@ -48,8 +48,12 @@ TensorImpl::TensorImpl(Storage&& storage, TensorTypeId type_id, const caffe2::Ty
|
||||
data_type_(data_type),
|
||||
device_opt_(device_opt),
|
||||
type_id_(type_id) {
|
||||
AT_ASSERT(type_id == UndefinedTensorId() || data_type.id() == caffe2::TypeIdentifier::uninitialized() ||
|
||||
device_opt_.has_value());
|
||||
if (type_id != UndefinedTensorId()) {
|
||||
AT_ASSERT(data_type.id() == caffe2::TypeIdentifier::uninitialized() ||
|
||||
device_opt_.has_value());
|
||||
// UndefinedTensorImpl is a singleton, so we skip logging it
|
||||
C10_LOG_API_USAGE_ONCE("tensor.create");
|
||||
}
|
||||
// we would also like to check that non-cpu devices have an index, but some Caffe2 operators create
|
||||
// Storages with default devices.
|
||||
strides_.push_back(1);
|
||||
|
@ -29,10 +29,21 @@
|
||||
classname(const classname&) = delete; \
|
||||
classname& operator=(const classname&) = delete
|
||||
|
||||
#define CONCAT_IMPL(x, y) x##y
|
||||
#define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y)
|
||||
#define C10_CONCATENATE_IMPL(s1, s2) s1##s2
|
||||
#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)
|
||||
|
||||
#define C10_MACRO_EXPAND(args) args
|
||||
|
||||
/**
|
||||
* C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
|
||||
* str and ending with a number that varies with the line.
|
||||
*/
|
||||
#ifdef __COUNTER__
|
||||
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
|
||||
#else
|
||||
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
|
||||
#endif
|
||||
|
||||
#define MACRO_EXPAND(args) args
|
||||
|
||||
/// C10_NODISCARD - Warn if a type or return value is discarded.
|
||||
|
||||
|
@ -1,9 +1,10 @@
|
||||
#include "c10/util/Logging.h"
|
||||
#include "c10/util/Flags.h"
|
||||
#include "c10/util/Backtrace.h"
|
||||
#include "c10/util/Flags.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
|
||||
@ -24,7 +25,9 @@ namespace enforce_detail {
|
||||
|
||||
namespace {
|
||||
std::function<string(void)>* GetFetchStackTrace() {
|
||||
static std::function<string(void)> func = []() { return get_backtrace(/*frames_to_skip=*/ 1); };
|
||||
static std::function<string(void)> func = []() {
|
||||
return get_backtrace(/*frames_to_skip=*/1);
|
||||
};
|
||||
return &func;
|
||||
};
|
||||
} // namespace
|
||||
@ -49,12 +52,43 @@ void ThrowEnforceNotMet(
|
||||
// PyTorch-style error message
|
||||
// (This must be defined here for access to GetFetchStackTrace)
|
||||
Error::Error(SourceLocation source_location, const std::string& msg)
|
||||
: Error(
|
||||
msg,
|
||||
str(" (",
|
||||
source_location,
|
||||
")\n",
|
||||
(*GetFetchStackTrace())())) {}
|
||||
: Error(msg, str(" (", source_location, ")\n", (*GetFetchStackTrace())())) {
|
||||
}
|
||||
|
||||
using APIUsageLoggerType = std::function<void(const std::string&)>;
|
||||
|
||||
namespace {
|
||||
bool IsAPIUsageDebugMode() {
|
||||
return getenv("PYTORCH_API_USAGE_STDERR");
|
||||
}
|
||||
|
||||
void APIUsageDebug(const string& event) {
|
||||
// use stderr to avoid messing with glog
|
||||
std::cerr << "PYTORCH_API_USAGE " << event << std::endl;
|
||||
}
|
||||
|
||||
APIUsageLoggerType* GetAPIUsageLogger() {
|
||||
static APIUsageLoggerType func =
|
||||
IsAPIUsageDebugMode() ? &APIUsageDebug : [](const string&) {};
|
||||
return &func;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
void SetAPIUsageLogger(std::function<void(const std::string&)> logger) {
|
||||
AT_ASSERT(logger);
|
||||
*GetAPIUsageLogger() = logger;
|
||||
}
|
||||
|
||||
void LogAPIUsage(const std::string& event) {
|
||||
(*GetAPIUsageLogger())(event);
|
||||
}
|
||||
|
||||
namespace detail {
|
||||
bool LogAPIUsageFakeReturn(const std::string& event) {
|
||||
(*GetAPIUsageLogger())(event);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace c10
|
||||
|
||||
|
@ -257,6 +257,32 @@ BINARY_COMP_HELPER(LessEquals, <=)
|
||||
#define CAFFE_ENFORCE_GT_WITH_CALLER(x, y, ...) \
|
||||
CAFFE_ENFORCE_THAT_IMPL_WITH_CALLER( \
|
||||
Greater((x), (y)), #x " > " #y, __VA_ARGS__)
|
||||
|
||||
/**
|
||||
* Very lightweight logging for the first time API usage. It's beneficial for
|
||||
* tracking of individual functionality usage in larger applications.
|
||||
*
|
||||
* In order to ensure light-weightness of logging, we utilize static variable
|
||||
* trick - LogAPIUsage will be invoked only once and further invocations will
|
||||
* just do an atomic check.
|
||||
*
|
||||
* Example:
|
||||
* // Logs caller info with an arbitrary text event, if there is a usage.
|
||||
* C10_LOG_API_USAGE_ONCE("my_api");
|
||||
*/
|
||||
#define C10_LOG_API_USAGE_ONCE(...) \
|
||||
C10_UNUSED static bool C10_ANONYMOUS_VARIABLE(logFlag) = \
|
||||
::c10::detail::LogAPIUsageFakeReturn(__VA_ARGS__);
|
||||
|
||||
// API usage logging capabilities
|
||||
C10_API void SetAPIUsageLogger(std::function<void(const std::string&)> logger);
|
||||
C10_API void LogAPIUsage(const std::string& context);
|
||||
|
||||
namespace detail {
|
||||
// Return value is needed to do the static variable initialization trick
|
||||
C10_API bool LogAPIUsageFakeReturn(const std::string& context);
|
||||
}
|
||||
|
||||
} // namespace c10
|
||||
|
||||
#endif // C10_UTIL_LOGGING_H_
|
||||
|
@ -185,18 +185,6 @@ class Registerer {
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* C10_ANONYMOUS_VARIABLE(str) introduces an identifier starting with
|
||||
* str and ending with a number that varies with the line.
|
||||
*/
|
||||
#define C10_CONCATENATE_IMPL(s1, s2) s1##s2
|
||||
#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2)
|
||||
#ifdef __COUNTER__
|
||||
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__)
|
||||
#else
|
||||
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__)
|
||||
#endif
|
||||
|
||||
/**
|
||||
* C10_DECLARE_TYPED_REGISTRY is a macro that expands to a function
|
||||
* declaration, as well as creating a convenient typename for its corresponding
|
||||
|
@ -23,10 +23,10 @@
|
||||
#include <c10/util/Exception.h>
|
||||
#include <c10/util/Half.h>
|
||||
#include <c10/util/IdWrapper.h>
|
||||
#include <c10/util/Type.h>
|
||||
#include <c10/util/qint32.h>
|
||||
#include <c10/util/qint8.h>
|
||||
#include <c10/util/quint8.h>
|
||||
#include <c10/util/qint32.h>
|
||||
#include <c10/util/Type.h>
|
||||
|
||||
/*
|
||||
* TypeIdentifier is a small type containing an id.
|
||||
@ -498,15 +498,15 @@ inline std::ostream& operator<<(
|
||||
#define EXPORT_IF_NOT_GCC
|
||||
#endif
|
||||
|
||||
#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter) \
|
||||
namespace detail { \
|
||||
const TypeMetaData MACRO_CONCAT(_typeMetaDataInstance_, Counter) = \
|
||||
_makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
|
||||
} \
|
||||
template <> \
|
||||
EXPORT_IF_NOT_GCC const detail::TypeMetaData* \
|
||||
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
|
||||
return &MACRO_CONCAT(detail::_typeMetaDataInstance_, Counter); \
|
||||
#define _CAFFE_KNOWN_TYPE_DEFINE_TYPEMETADATA_INSTANCE(T, Counter) \
|
||||
namespace detail { \
|
||||
const TypeMetaData C10_CONCATENATE(_typeMetaDataInstance_, Counter) = \
|
||||
_makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
|
||||
} \
|
||||
template <> \
|
||||
EXPORT_IF_NOT_GCC const detail::TypeMetaData* \
|
||||
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
|
||||
return &C10_CONCATENATE(detail::_typeMetaDataInstance_, Counter); \
|
||||
}
|
||||
#define CAFFE_KNOWN_TYPE(T) \
|
||||
template <> \
|
||||
@ -529,20 +529,20 @@ inline std::ostream& operator<<(
|
||||
return TypeIdentifier(PreallocatedId); \
|
||||
} \
|
||||
namespace detail { \
|
||||
C10_API extern const TypeMetaData MACRO_CONCAT( \
|
||||
C10_API extern const TypeMetaData C10_CONCATENATE( \
|
||||
_typeMetaDataInstance_preallocated_, \
|
||||
PreallocatedId); \
|
||||
}
|
||||
#define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T) \
|
||||
namespace detail { \
|
||||
C10_EXPORT const TypeMetaData MACRO_CONCAT( \
|
||||
C10_EXPORT const TypeMetaData C10_CONCATENATE( \
|
||||
_typeMetaDataInstance_preallocated_, \
|
||||
PreallocatedId) = _makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
|
||||
} \
|
||||
template <> \
|
||||
C10_EXPORT const detail::TypeMetaData* \
|
||||
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
|
||||
return &MACRO_CONCAT( \
|
||||
return &C10_CONCATENATE( \
|
||||
detail::_typeMetaDataInstance_preallocated_, PreallocatedId); \
|
||||
}
|
||||
#else // _MSC_VER
|
||||
@ -552,19 +552,19 @@ inline std::ostream& operator<<(
|
||||
return TypeIdentifier(PreallocatedId); \
|
||||
} \
|
||||
namespace detail { \
|
||||
C10_EXPORT extern const TypeMetaData MACRO_CONCAT( \
|
||||
C10_EXPORT extern const TypeMetaData C10_CONCATENATE( \
|
||||
_typeMetaDataInstance_preallocated_, \
|
||||
PreallocatedId); \
|
||||
} \
|
||||
template <> \
|
||||
inline const detail::TypeMetaData* \
|
||||
TypeMeta::_typeMetaDataInstance<T>() noexcept { \
|
||||
return &MACRO_CONCAT( \
|
||||
return &C10_CONCATENATE( \
|
||||
detail::_typeMetaDataInstance_preallocated_, PreallocatedId); \
|
||||
}
|
||||
#define CAFFE_DEFINE_PREALLOCATED_KNOWN_TYPE(PreallocatedId, T) \
|
||||
namespace detail { \
|
||||
const TypeMetaData MACRO_CONCAT( \
|
||||
const TypeMetaData C10_CONCATENATE( \
|
||||
_typeMetaDataInstance_preallocated_, \
|
||||
PreallocatedId) = _makeTypeMetaDataInstance<T>(_typeName<T>(#T)); \
|
||||
}
|
||||
|
@ -201,6 +201,7 @@ static void Caffe2InitializeCuda() {
|
||||
VLOG(1) << "No cuda gpu present. Skipping.";
|
||||
return;
|
||||
}
|
||||
C10_LOG_API_USAGE_ONCE("caffe2.init.cuda");
|
||||
// Check if the number of GPUs matches the expected compile-time max number
|
||||
// of GPUs.
|
||||
CAFFE_ENFORCE_LE(
|
||||
|
@ -42,6 +42,7 @@ bool GlobalInitAlreadyRun() {
|
||||
}
|
||||
|
||||
bool GlobalInit(int* pargc, char*** pargv) {
|
||||
C10_LOG_API_USAGE_ONCE("caffe2.global_init");
|
||||
static std::recursive_mutex init_mutex;
|
||||
std::lock_guard<std::recursive_mutex> guard(init_mutex);
|
||||
internal::State& init_state = internal::GlobalInitState();
|
||||
|
@ -37,6 +37,7 @@ NetBase::NetBase(
|
||||
name_(def->name()),
|
||||
net_def_(def) {
|
||||
static GlobalInitIsCalledGuard guard;
|
||||
C10_LOG_API_USAGE_ONCE("caffe2.net.create");
|
||||
// Check that node_name is empty for all ops
|
||||
for (const OperatorDef& op : def->op()) {
|
||||
if (op.has_device_option()) {
|
||||
|
@ -386,6 +386,7 @@ C10_DEFINE_REGISTRY(
|
||||
|
||||
GradientOpsMeta GetGradientForOp(
|
||||
const OperatorDef& def, const vector<GradientWrapper>& g_output) {
|
||||
C10_LOG_API_USAGE_ONCE("caffe2.gradient_maker");
|
||||
std::unique_ptr<GradientMakerBase> maker(
|
||||
GradientRegistry()->Create(def.type(), def, g_output));
|
||||
CAFFE_ENFORCE(maker,
|
||||
|
@ -1267,7 +1267,7 @@ C10_DECLARE_REGISTRY(
|
||||
#define REGISTER_CPU_GRADIENT_OPERATOR(...) /* No gradients. */
|
||||
#else
|
||||
#define REGISTER_CPU_GRADIENT_OPERATOR(...) \
|
||||
MACRO_EXPAND(REGISTER_CPU_OPERATOR(__VA_ARGS__))
|
||||
C10_MACRO_EXPAND(REGISTER_CPU_OPERATOR(__VA_ARGS__))
|
||||
#endif
|
||||
|
||||
C10_DECLARE_REGISTRY(
|
||||
|
@ -1819,6 +1819,8 @@ void addGlobalMethods(py::module& m) {
|
||||
PYBIND11_MODULE(caffe2_pybind11_state, m) {
|
||||
m.doc() = "pybind11 stateful interface to Caffe2 workspaces";
|
||||
|
||||
C10_LOG_API_USAGE_ONCE("caffe2.python.import");
|
||||
|
||||
addGlobalMethods(m);
|
||||
addObjectMethods(m);
|
||||
for (const auto& addition : PybindAdditionRegistry()->Keys()) {
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <cstdlib>
|
||||
#include <libshm.h>
|
||||
#include <TH/TH.h>
|
||||
#include <c10/util/Logging.h>
|
||||
#include <ATen/ATen.h>
|
||||
#include <ATen/ExpandUtils.h>
|
||||
#include <ATen/dlpack.h>
|
||||
@ -565,6 +566,16 @@ static void warning_handler(
|
||||
}
|
||||
}
|
||||
|
||||
// In Python we can't use the trick of C10_LOG_API_USAGE_ONCE
|
||||
// Guaranteed to be invoked from Python under GIL, no locking on map needed
|
||||
static void LogAPIUsageOnceFromPython(const std::string& event) {
|
||||
static std::unordered_set<std::string> seen;
|
||||
if (!seen.count(event)) {
|
||||
seen.insert(event);
|
||||
c10::LogAPIUsage(event);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
__declspec(dllexport)
|
||||
@ -573,6 +584,8 @@ PyObject* initModule() {
|
||||
HANDLE_TH_ERRORS
|
||||
at::init_num_threads();
|
||||
|
||||
C10_LOG_API_USAGE_ONCE("torch.python.import");
|
||||
|
||||
#define ASSERT_TRUE(cmd) if (!(cmd)) return nullptr
|
||||
|
||||
THPUtils_addPyMethodDefs(methods, TorchMethods);
|
||||
@ -675,7 +688,9 @@ PyObject* initModule() {
|
||||
// setting up TH Errors so that they throw C++ exceptions
|
||||
at::init();
|
||||
|
||||
py::reinterpret_borrow<py::module>(module).def("_demangle", &c10::demangle);
|
||||
auto py_module = py::reinterpret_borrow<py::module>(module);
|
||||
py_module.def("_demangle", &c10::demangle);
|
||||
py_module.def("_log_api_usage_once", &LogAPIUsageOnceFromPython);
|
||||
|
||||
// Set ATen warnings to issue Python warnings
|
||||
::c10::Warning::set_warning_handler(&warning_handler);
|
||||
|
@ -36,6 +36,7 @@ template <typename T>
|
||||
using shared_ptr_class_ = py::class_<T, std::shared_ptr<T>>;
|
||||
|
||||
PyObject* c10d_init(PyObject* _unused) {
|
||||
C10_LOG_API_USAGE_ONCE("c10d.python.import");
|
||||
auto c10d_module = THPObjectPtr(PyImport_ImportModule("torch.distributed"));
|
||||
if (!c10d_module) {
|
||||
throw python_error();
|
||||
|
@ -550,6 +550,7 @@ ScriptModuleSerializer::ScriptModuleSerializer(std::ostream* ofs)
|
||||
void ScriptModuleSerializer::serialize(
|
||||
const script::Module& module,
|
||||
const script::ExtraFilesMap& extra_files) {
|
||||
C10_LOG_API_USAGE_ONCE("torch.script.save");
|
||||
torch::ModelDef model_def;
|
||||
convertModel(module, &model_def, extra_files);
|
||||
std::string output;
|
||||
|
@ -488,6 +488,7 @@ struct GraphExecutorImpl : public GraphExecutorImplBase {
|
||||
" inputs, but got only ",
|
||||
stack.size());
|
||||
|
||||
C10_LOG_API_USAGE_ONCE("torch.graph_executor.run");
|
||||
logging::getLogger()->addStatValue(
|
||||
logging::runtime_counters::GRAPH_EXECUTOR_INVOCATIONS, 1.0);
|
||||
|
||||
@ -579,7 +580,7 @@ struct GraphExecutorImpl : public GraphExecutorImplBase {
|
||||
for (Node* dnode : diff_nodes) {
|
||||
auto diff_graph = std::move(dnode->g(attr::Subgraph));
|
||||
Gradient gradient = differentiate(diff_graph);
|
||||
// Run post differentiation optimizations, Autodiff will replace some
|
||||
// Run post differentiation optimizations, Autodiff will replace some
|
||||
// parts of graph with new graph, these new graphs usually consists of
|
||||
// control flows and miss shape information on nodes, so we run shape
|
||||
// prop and differentiable optimizations to ensure the graph is optimized
|
||||
|
@ -95,6 +95,7 @@ void ScriptModuleDeserializer::deserialize(
|
||||
script::ModuleLookup module_lookup,
|
||||
c10::optional<at::Device> device,
|
||||
script::ExtraFilesMap& extra_files) {
|
||||
C10_LOG_API_USAGE_ONCE("torch.script.load");
|
||||
torch::ModelDef model_def;
|
||||
at::DataPtr data_ptr;
|
||||
size_t data_size;
|
||||
|
@ -42,6 +42,8 @@ std::shared_ptr<torch::jit::Graph> createGraphByTracing(
|
||||
const py::function& var_name_lookup_fn,
|
||||
bool force_outplace,
|
||||
const std::shared_ptr<script::Module>& self) {
|
||||
C10_LOG_API_USAGE_ONCE("torch.tracer");
|
||||
|
||||
auto enter_info = tracer::enter(std::move(trace_inputs), self);
|
||||
auto graph = enter_info.first->graph;
|
||||
|
||||
|
@ -531,6 +531,7 @@ void initJitScriptBindings(PyObject* module) {
|
||||
m.def(
|
||||
"_jit_script_compile",
|
||||
[](const Def& def, ResolutionCallback rcb, FunctionDefaults defaults) {
|
||||
C10_LOG_API_USAGE_ONCE("torch.script.compile");
|
||||
CompilationUnit cu;
|
||||
cu.define({def}, {pythonResolver(rcb)}, nullptr);
|
||||
std::shared_ptr<Function> defined = cu.get_functions().at(0);
|
||||
@ -561,6 +562,7 @@ void initJitScriptBindings(PyObject* module) {
|
||||
[](const std::string& qualifiedName,
|
||||
const ClassDef& classDef,
|
||||
ResolutionCallback rcb) {
|
||||
C10_LOG_API_USAGE_ONCE("torch.script.class");
|
||||
auto cu = std::make_shared<CompilationUnit>();
|
||||
auto classType =
|
||||
ClassType::create(c10::QualifiedName(qualifiedName), cu);
|
||||
|
@ -1,5 +1,7 @@
|
||||
#include <c10d/ProcessGroup.hpp>
|
||||
|
||||
#include <c10/util/Logging.h>
|
||||
|
||||
namespace c10d {
|
||||
|
||||
ProcessGroup::Work::~Work() {}
|
||||
@ -44,7 +46,9 @@ void ProcessGroup::Work::finish(std::exception_ptr exception) {
|
||||
cv_.notify_all();
|
||||
}
|
||||
|
||||
ProcessGroup::ProcessGroup(int rank, int size) : rank_(rank), size_(size) {}
|
||||
ProcessGroup::ProcessGroup(int rank, int size) : rank_(rank), size_(size) {
|
||||
C10_LOG_API_USAGE_ONCE("c10d.process_group");
|
||||
}
|
||||
|
||||
ProcessGroup::~ProcessGroup() {}
|
||||
|
||||
|
@ -63,6 +63,7 @@ class Module(object):
|
||||
_version = 1
|
||||
|
||||
def __init__(self):
|
||||
torch._C._log_api_usage_once("python.nn_module")
|
||||
self._backend = thnn_backend
|
||||
self._parameters = OrderedDict()
|
||||
self._buffers = OrderedDict()
|
||||
|
@ -30,6 +30,7 @@ class Optimizer(object):
|
||||
"""
|
||||
|
||||
def __init__(self, params, defaults):
|
||||
torch._C._log_api_usage_once("python.optimizer")
|
||||
self.defaults = defaults
|
||||
|
||||
if isinstance(params, torch.Tensor):
|
||||
|
@ -142,6 +142,7 @@ class DataLoader(object):
|
||||
batch_sampler=None, num_workers=0, collate_fn=default_collate,
|
||||
pin_memory=False, drop_last=False, timeout=0,
|
||||
worker_init_fn=None):
|
||||
torch._C._log_api_usage_once("python.data_loader")
|
||||
self.dataset = dataset
|
||||
self.batch_size = batch_size
|
||||
self.num_workers = num_workers
|
||||
|
Reference in New Issue
Block a user