mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
init kineto after torch module initialized (#131448)
Fixes #131020 As discussed in the issue thread, we can use ` KINETO_DAEMON_INIT_DELAY_S` to delay the initialization of `kineto` in case `kineto` is initialized before `libtorch_cuda.so`. It's not clear to set a proper value of environmental variable `KINETO_DAEMON_INIT_DELAY_S`, here's a trick to make the initialization of `kineto` after the initialization of module `torch`. I'm not sure whether this is an acceptable trick, please take a look at this pr, thanks. Pull Request resolved: https://github.com/pytorch/pytorch/pull/131448 Approved by: https://github.com/sraikund16, https://github.com/briancoutinho
This commit is contained in:
committed by
PyTorch MergeBot
parent
ccaa2a206a
commit
c934ed6567
51
test/profiler/test_kineto.py
Normal file
51
test/profiler/test_kineto.py
Normal file
@ -0,0 +1,51 @@
|
||||
# Owner(s): ["oncall: profiler"]
|
||||
import os
|
||||
import subprocess
|
||||
import sys
|
||||
from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
from torch.testing._internal.common_utils import run_tests, TestCase
|
||||
|
||||
|
||||
class SimpleKinetoInitializationTest(TestCase):
|
||||
@patch.dict(os.environ, {"KINETO_USE_DAEMON": "1"})
|
||||
def test_kineto_profiler_with_environment_variable(self):
|
||||
"""
|
||||
This test checks whether kineto works with torch in daemon mode, please refer to issue #112389 and #131020.
|
||||
Besides that, this test will also check that kineto will not be initialized when user loads the shared library
|
||||
directly.
|
||||
"""
|
||||
script = """
|
||||
import torch
|
||||
if torch.cuda.is_available() > 0:
|
||||
torch.cuda.init()
|
||||
"""
|
||||
try:
|
||||
subprocess.check_output(
|
||||
[sys.executable, "-W", "always", "-c", script],
|
||||
cwd=os.path.dirname(os.path.realpath(__file__)),
|
||||
)
|
||||
except subprocess.CalledProcessError as e:
|
||||
if e.returncode != 0:
|
||||
self.assertTrue(
|
||||
False,
|
||||
"Kineto is not working properly with the Dynolog environment variable",
|
||||
)
|
||||
# import the shared library directly - it triggers static init but doesn't call kineto_init
|
||||
env = os.environ.copy()
|
||||
env["KINETO_USE_DAEMON"] = "1"
|
||||
if "KINETO_DAEMON_INIT_DELAY_S" in env:
|
||||
env.pop("KINETO_DAEMON_INIT_DELAY_S")
|
||||
_, stderr = TestCase.run_process_no_exception(
|
||||
f"from ctypes import CDLL; CDLL('{torch._C.__file__}')"
|
||||
)
|
||||
self.assertNotRegex(
|
||||
stderr.decode("ascii"),
|
||||
"Registering daemon config loader",
|
||||
"kineto should not be initialized when the shared library is imported directly",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
@ -102,6 +102,7 @@
|
||||
|
||||
#include <ATen/native/transformers/sdp_utils_cpp.h>
|
||||
#include <torch/csrc/profiler/combined_traceback.h>
|
||||
#include <torch/csrc/profiler/kineto_client_interface.h>
|
||||
#include <sstream>
|
||||
|
||||
#ifdef USE_CUDA
|
||||
@ -2443,6 +2444,10 @@ Call this whenever a new thread is created in order to propagate values from
|
||||
torch::set_disabled_torch_dispatch_impl(
|
||||
PyObject_GetAttrString(module, "_disabled_torch_dispatch_impl"));
|
||||
ASSERT_TRUE(torch::disabled_torch_dispatch_impl() != nullptr);
|
||||
// init kineto here
|
||||
#ifdef USE_KINETO
|
||||
torch::global_kineto_init();
|
||||
#endif
|
||||
return module;
|
||||
END_HANDLE_TH_ERRORS
|
||||
}
|
||||
|
@ -2,6 +2,7 @@
|
||||
#include <ATen/Context.h>
|
||||
#include <libkineto.h>
|
||||
#include <torch/csrc/autograd/profiler_kineto.h>
|
||||
#include <torch/csrc/profiler/kineto_client_interface.h>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
@ -71,46 +72,24 @@ class LibKinetoClient : public libkineto::ClientInterface {
|
||||
|
||||
} // namespace profiler::impl
|
||||
|
||||
void global_kineto_init() {
|
||||
#if ENABLE_GLOBAL_OBSERVER
|
||||
if (c10::utils::get_env("KINETO_USE_DAEMON").has_value()) {
|
||||
libkineto_init(
|
||||
/*cpuOnly=*/!(at::hasCUDA() || at::hasXPU() || at::hasMTIA()),
|
||||
/*logOnError=*/true);
|
||||
libkineto::api().suppressLogMessages();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#if ENABLE_GLOBAL_OBSERVER
|
||||
namespace {
|
||||
|
||||
int get_init_delay() {
|
||||
const char* delay_c = std::getenv("KINETO_DAEMON_INIT_DELAY_S");
|
||||
if (!delay_c) {
|
||||
return -1;
|
||||
}
|
||||
std::string delay_s{delay_c};
|
||||
try {
|
||||
return std::stoi(delay_s);
|
||||
} catch (const std::invalid_argument& _) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
struct RegisterLibKinetoClient {
|
||||
RegisterLibKinetoClient() {
|
||||
static profiler::impl::LibKinetoClient client;
|
||||
libkineto::api().registerClient(&client);
|
||||
|
||||
auto kineto_init = []() {
|
||||
libkineto_init(
|
||||
/*cpuOnly=*/!(at::hasCUDA() || at::hasXPU() || at::hasMTIA()),
|
||||
/*logOnError=*/true);
|
||||
libkineto::api().suppressLogMessages();
|
||||
};
|
||||
|
||||
if (std::getenv("KINETO_USE_DAEMON") != nullptr) {
|
||||
int init_delay_s = get_init_delay();
|
||||
if (init_delay_s > 0) {
|
||||
std::thread t([init_delay_s, kineto_init]() {
|
||||
std::this_thread::sleep_for(std::chrono::seconds(init_delay_s));
|
||||
kineto_init();
|
||||
});
|
||||
t.detach();
|
||||
} else {
|
||||
kineto_init();
|
||||
}
|
||||
}
|
||||
}
|
||||
} register_libkineto_client;
|
||||
|
||||
|
11
torch/csrc/profiler/kineto_client_interface.h
Normal file
11
torch/csrc/profiler/kineto_client_interface.h
Normal file
@ -0,0 +1,11 @@
|
||||
#pragma once
|
||||
|
||||
#include <torch/csrc/jit/runtime/interpreter.h>
|
||||
#include <torch/csrc/profiler/unwind/unwind.h>
|
||||
|
||||
namespace torch {
|
||||
|
||||
// declare global_kineto_init for libtorch_cpu.so to call
|
||||
TORCH_API void global_kineto_init(void);
|
||||
|
||||
} // namespace torch
|
Reference in New Issue
Block a user