mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[pytorch][monitoring] Dynamic backend for WaitCounter (#135967)
Summary: This implements a default backend proxy that tries to look up a backend via dlsym. What this enables is dynamically loading a module with a backend implementation without having it statically linked with the application. Differential Revision: D62549295 Pull Request resolved: https://github.com/pytorch/pytorch/pull/135967 Approved by: https://github.com/c-p-i-o
This commit is contained in:
committed by
PyTorch MergeBot
parent
dec3403b24
commit
a141c6bb0d
@ -127,6 +127,7 @@ if(NOT BUILD_LIBTORCHLESS)
|
||||
|
||||
if(LINUX)
|
||||
target_link_libraries(c10 PRIVATE Threads::Threads)
|
||||
target_link_libraries(c10 PRIVATE dl)
|
||||
endif()
|
||||
|
||||
if(ANDROID)
|
||||
|
@ -1,6 +1,7 @@
|
||||
#include <c10/util/WaitCounter.h>
|
||||
|
||||
#include <c10/util/Synchronized.h>
|
||||
#include <c10/util/WaitCounterDynamicBackend.h>
|
||||
|
||||
#include <chrono>
|
||||
#include <memory>
|
||||
@ -8,6 +9,10 @@
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
||||
namespace c10::monitor {
|
||||
|
||||
namespace detail {
|
||||
@ -19,6 +24,58 @@ Synchronized<WaitCounterBackendFactories>& waitCounterBackendFactories() {
|
||||
static auto instance = new Synchronized<WaitCounterBackendFactories>();
|
||||
return *instance;
|
||||
}
|
||||
|
||||
class DynamicBackendWrapper : public WaitCounterBackendIf {
|
||||
public:
|
||||
explicit DynamicBackendWrapper(WaitCounterDynamicBackend impl)
|
||||
: impl_{impl} {}
|
||||
~DynamicBackendWrapper() override {
|
||||
impl_.destroy(impl_.self);
|
||||
}
|
||||
|
||||
intptr_t start(std::chrono::steady_clock::time_point now) noexcept override {
|
||||
return impl_.start(
|
||||
impl_.self,
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now.time_since_epoch())
|
||||
.count());
|
||||
}
|
||||
|
||||
void stop(std::chrono::steady_clock::time_point now, intptr_t ctx) noexcept
|
||||
override {
|
||||
return impl_.stop(
|
||||
impl_.self,
|
||||
std::chrono::duration_cast<std::chrono::microseconds>(
|
||||
now.time_since_epoch())
|
||||
.count(),
|
||||
ctx);
|
||||
}
|
||||
|
||||
private:
|
||||
WaitCounterDynamicBackend impl_;
|
||||
};
|
||||
|
||||
std::unique_ptr<WaitCounterBackendIf> getDynamicBackend(std::string_view key) {
|
||||
static auto dynamicBackendInit =
|
||||
reinterpret_cast<WaitCounterDynamicBackendInit>([]() -> void* {
|
||||
#ifndef _WIN32
|
||||
return dlsym(
|
||||
RTLD_DEFAULT,
|
||||
std::string(kWaitCounterDynamicBackendInitFn).c_str());
|
||||
#else
|
||||
return nullptr;
|
||||
#endif
|
||||
}());
|
||||
if (!dynamicBackendInit) {
|
||||
return nullptr;
|
||||
}
|
||||
WaitCounterDynamicBackend backend;
|
||||
dynamicBackendInit(&backend, &key[0], key.size());
|
||||
if (!backend.self) {
|
||||
return nullptr;
|
||||
}
|
||||
return std::make_unique<DynamicBackendWrapper>(backend);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
class WaitCounterImpl {
|
||||
@ -70,6 +127,9 @@ class WaitCounterImpl {
|
||||
backends_.push_back(std::move(backend));
|
||||
}
|
||||
}
|
||||
if (auto backend = getDynamicBackend(key)) {
|
||||
backends_.push_back(std::move(backend));
|
||||
}
|
||||
}
|
||||
|
||||
SmallVector<std::unique_ptr<WaitCounterBackendIf>> backends_;
|
||||
@ -80,6 +140,12 @@ void registerWaitCounterBackend(
|
||||
waitCounterBackendFactories().withLock(
|
||||
[&](auto& factories) { factories.push_back(std::move(factory)); });
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<WaitCounterBackendFactoryIf>>
|
||||
getRegisteredWaitCounterBackends() {
|
||||
return waitCounterBackendFactories().withLock(
|
||||
[](auto& factories) { return factories; });
|
||||
}
|
||||
} // namespace detail
|
||||
|
||||
WaitCounterHandle::WaitCounterHandle(std::string_view key)
|
||||
|
@ -36,6 +36,9 @@ class WaitCounterBackendFactoryIf {
|
||||
|
||||
C10_API void registerWaitCounterBackend(
|
||||
std::unique_ptr<WaitCounterBackendFactoryIf>);
|
||||
|
||||
C10_API std::vector<std::shared_ptr<WaitCounterBackendFactoryIf>>
|
||||
getRegisteredWaitCounterBackends();
|
||||
} // namespace detail
|
||||
|
||||
// A handle to a wait counter.
|
||||
|
21
c10/util/WaitCounterDynamicBackend.h
Normal file
21
c10/util/WaitCounterDynamicBackend.h
Normal file
@ -0,0 +1,21 @@
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string_view>
|
||||
|
||||
namespace c10::monitor::detail {
|
||||
|
||||
struct WaitCounterDynamicBackend {
|
||||
void* self{nullptr};
|
||||
intptr_t (*start)(void* self, int64_t nowUs){nullptr};
|
||||
void (*stop)(void* self, int64_t nowUs, intptr_t ctx){nullptr};
|
||||
void (*destroy)(void* self){nullptr};
|
||||
};
|
||||
|
||||
using WaitCounterDynamicBackendInit =
|
||||
void (*)(WaitCounterDynamicBackend*, const char* key, std::size_t keyLen);
|
||||
|
||||
// This name needs to be updated if anything in the API above is changed.
|
||||
constexpr std::string_view kWaitCounterDynamicBackendInitFn =
|
||||
"c10_monitor_wait_counter_dynamic_backend_init_v1";
|
||||
} // namespace c10::monitor::detail
|
@ -43,6 +43,10 @@ def define_targets(rules):
|
||||
"//c10:using_glog": ["@com_github_glog//:glog"],
|
||||
"//conditions:default": [],
|
||||
}),
|
||||
linkopts = rules.select({
|
||||
"@bazel_tools//src/conditions:windows": [],
|
||||
"//conditions:default": ["-ldl"],
|
||||
}),
|
||||
# This library uses flags and registration. Do not let the
|
||||
# linker remove them.
|
||||
alwayslink = True,
|
||||
|
Reference in New Issue
Block a user