[pytorch][monitoring] Dynamic backend for WaitCounter (#135967)

Summary: This implements a default backend proxy that tries to look up a backend via dlsym. What this enables is dynamically loading a module with a backend implementation without having it statically linked with the application.

Differential Revision: D62549295

Pull Request resolved: https://github.com/pytorch/pytorch/pull/135967
Approved by: https://github.com/c-p-i-o
This commit is contained in:
Andrii Grynenko
2024-09-15 18:07:49 +00:00
committed by PyTorch MergeBot
parent dec3403b24
commit a141c6bb0d
5 changed files with 95 additions and 0 deletions

View File

@ -127,6 +127,7 @@ if(NOT BUILD_LIBTORCHLESS)
if(LINUX)
target_link_libraries(c10 PRIVATE Threads::Threads)
target_link_libraries(c10 PRIVATE dl)
endif()
if(ANDROID)

View File

@ -1,6 +1,7 @@
#include <c10/util/WaitCounter.h>
#include <c10/util/Synchronized.h>
#include <c10/util/WaitCounterDynamicBackend.h>
#include <chrono>
#include <memory>
@ -8,6 +9,10 @@
#include <unordered_map>
#include <vector>
#ifndef _WIN32
#include <dlfcn.h>
#endif
namespace c10::monitor {
namespace detail {
@ -19,6 +24,58 @@ Synchronized<WaitCounterBackendFactories>& waitCounterBackendFactories() {
static auto instance = new Synchronized<WaitCounterBackendFactories>();
return *instance;
}
class DynamicBackendWrapper : public WaitCounterBackendIf {
public:
explicit DynamicBackendWrapper(WaitCounterDynamicBackend impl)
: impl_{impl} {}
~DynamicBackendWrapper() override {
impl_.destroy(impl_.self);
}
intptr_t start(std::chrono::steady_clock::time_point now) noexcept override {
return impl_.start(
impl_.self,
std::chrono::duration_cast<std::chrono::microseconds>(
now.time_since_epoch())
.count());
}
void stop(std::chrono::steady_clock::time_point now, intptr_t ctx) noexcept
override {
return impl_.stop(
impl_.self,
std::chrono::duration_cast<std::chrono::microseconds>(
now.time_since_epoch())
.count(),
ctx);
}
private:
WaitCounterDynamicBackend impl_;
};
std::unique_ptr<WaitCounterBackendIf> getDynamicBackend(std::string_view key) {
static auto dynamicBackendInit =
reinterpret_cast<WaitCounterDynamicBackendInit>([]() -> void* {
#ifndef _WIN32
return dlsym(
RTLD_DEFAULT,
std::string(kWaitCounterDynamicBackendInitFn).c_str());
#else
return nullptr;
#endif
}());
if (!dynamicBackendInit) {
return nullptr;
}
WaitCounterDynamicBackend backend;
dynamicBackendInit(&backend, &key[0], key.size());
if (!backend.self) {
return nullptr;
}
return std::make_unique<DynamicBackendWrapper>(backend);
}
} // namespace
class WaitCounterImpl {
@ -70,6 +127,9 @@ class WaitCounterImpl {
backends_.push_back(std::move(backend));
}
}
if (auto backend = getDynamicBackend(key)) {
backends_.push_back(std::move(backend));
}
}
SmallVector<std::unique_ptr<WaitCounterBackendIf>> backends_;
@ -80,6 +140,12 @@ void registerWaitCounterBackend(
waitCounterBackendFactories().withLock(
[&](auto& factories) { factories.push_back(std::move(factory)); });
}
std::vector<std::shared_ptr<WaitCounterBackendFactoryIf>>
getRegisteredWaitCounterBackends() {
return waitCounterBackendFactories().withLock(
[](auto& factories) { return factories; });
}
} // namespace detail
WaitCounterHandle::WaitCounterHandle(std::string_view key)

View File

@ -36,6 +36,9 @@ class WaitCounterBackendFactoryIf {
C10_API void registerWaitCounterBackend(
std::unique_ptr<WaitCounterBackendFactoryIf>);
C10_API std::vector<std::shared_ptr<WaitCounterBackendFactoryIf>>
getRegisteredWaitCounterBackends();
} // namespace detail
// A handle to a wait counter.

View File

@ -0,0 +1,21 @@
#pragma once
#include <cstdint>
#include <string_view>
namespace c10::monitor::detail {
struct WaitCounterDynamicBackend {
void* self{nullptr};
intptr_t (*start)(void* self, int64_t nowUs){nullptr};
void (*stop)(void* self, int64_t nowUs, intptr_t ctx){nullptr};
void (*destroy)(void* self){nullptr};
};
using WaitCounterDynamicBackendInit =
void (*)(WaitCounterDynamicBackend*, const char* key, std::size_t keyLen);
// This name needs to be updated if anything in the API above is changed.
constexpr std::string_view kWaitCounterDynamicBackendInitFn =
"c10_monitor_wait_counter_dynamic_backend_init_v1";
} // namespace c10::monitor::detail

View File

@ -43,6 +43,10 @@ def define_targets(rules):
"//c10:using_glog": ["@com_github_glog//:glog"],
"//conditions:default": [],
}),
linkopts = rules.select({
"@bazel_tools//src/conditions:windows": [],
"//conditions:default": ["-ldl"],
}),
# This library uses flags and registration. Do not let the
# linker remove them.
alwayslink = True,