init kineto after torch module initialized (#131448)

Fixes #131020

As discussed in the issue thread,  we can use ` KINETO_DAEMON_INIT_DELAY_S` to delay the initialization of `kineto`  in case `kineto` is initialized before `libtorch_cuda.so`.

It's not clear to set a proper value of environmental variable `KINETO_DAEMON_INIT_DELAY_S`, here's a trick to make the initialization of `kineto` after the initialization of module `torch`. I'm not sure whether this is an acceptable trick, please take a look at this pr, thanks.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/131448
Approved by: https://github.com/sraikund16, https://github.com/briancoutinho
This commit is contained in:
augusto.yjh
2024-10-31 13:24:20 +00:00
committed by PyTorch MergeBot
parent ccaa2a206a
commit c934ed6567
4 changed files with 79 additions and 33 deletions

View File

@ -0,0 +1,51 @@
# Owner(s): ["oncall: profiler"]
import os
import subprocess
import sys
from unittest.mock import patch
import torch
from torch.testing._internal.common_utils import run_tests, TestCase
class SimpleKinetoInitializationTest(TestCase):
@patch.dict(os.environ, {"KINETO_USE_DAEMON": "1"})
def test_kineto_profiler_with_environment_variable(self):
"""
This test checks whether kineto works with torch in daemon mode, please refer to issue #112389 and #131020.
Besides that, this test will also check that kineto will not be initialized when user loads the shared library
directly.
"""
script = """
import torch
if torch.cuda.is_available() > 0:
torch.cuda.init()
"""
try:
subprocess.check_output(
[sys.executable, "-W", "always", "-c", script],
cwd=os.path.dirname(os.path.realpath(__file__)),
)
except subprocess.CalledProcessError as e:
if e.returncode != 0:
self.assertTrue(
False,
"Kineto is not working properly with the Dynolog environment variable",
)
# import the shared library directly - it triggers static init but doesn't call kineto_init
env = os.environ.copy()
env["KINETO_USE_DAEMON"] = "1"
if "KINETO_DAEMON_INIT_DELAY_S" in env:
env.pop("KINETO_DAEMON_INIT_DELAY_S")
_, stderr = TestCase.run_process_no_exception(
f"from ctypes import CDLL; CDLL('{torch._C.__file__}')"
)
self.assertNotRegex(
stderr.decode("ascii"),
"Registering daemon config loader",
"kineto should not be initialized when the shared library is imported directly",
)
if __name__ == "__main__":
run_tests()

View File

@ -102,6 +102,7 @@
#include <ATen/native/transformers/sdp_utils_cpp.h>
#include <torch/csrc/profiler/combined_traceback.h>
#include <torch/csrc/profiler/kineto_client_interface.h>
#include <sstream>
#ifdef USE_CUDA
@ -2443,6 +2444,10 @@ Call this whenever a new thread is created in order to propagate values from
torch::set_disabled_torch_dispatch_impl(
PyObject_GetAttrString(module, "_disabled_torch_dispatch_impl"));
ASSERT_TRUE(torch::disabled_torch_dispatch_impl() != nullptr);
// init kineto here
#ifdef USE_KINETO
torch::global_kineto_init();
#endif
return module;
END_HANDLE_TH_ERRORS
}

View File

@ -2,6 +2,7 @@
#include <ATen/Context.h>
#include <libkineto.h>
#include <torch/csrc/autograd/profiler_kineto.h>
#include <torch/csrc/profiler/kineto_client_interface.h>
#include <chrono>
#include <thread>
@ -71,46 +72,24 @@ class LibKinetoClient : public libkineto::ClientInterface {
} // namespace profiler::impl
void global_kineto_init() {
#if ENABLE_GLOBAL_OBSERVER
if (c10::utils::get_env("KINETO_USE_DAEMON").has_value()) {
libkineto_init(
/*cpuOnly=*/!(at::hasCUDA() || at::hasXPU() || at::hasMTIA()),
/*logOnError=*/true);
libkineto::api().suppressLogMessages();
}
#endif
}
#if ENABLE_GLOBAL_OBSERVER
namespace {
int get_init_delay() {
const char* delay_c = std::getenv("KINETO_DAEMON_INIT_DELAY_S");
if (!delay_c) {
return -1;
}
std::string delay_s{delay_c};
try {
return std::stoi(delay_s);
} catch (const std::invalid_argument& _) {
return -1;
}
}
struct RegisterLibKinetoClient {
RegisterLibKinetoClient() {
static profiler::impl::LibKinetoClient client;
libkineto::api().registerClient(&client);
auto kineto_init = []() {
libkineto_init(
/*cpuOnly=*/!(at::hasCUDA() || at::hasXPU() || at::hasMTIA()),
/*logOnError=*/true);
libkineto::api().suppressLogMessages();
};
if (std::getenv("KINETO_USE_DAEMON") != nullptr) {
int init_delay_s = get_init_delay();
if (init_delay_s > 0) {
std::thread t([init_delay_s, kineto_init]() {
std::this_thread::sleep_for(std::chrono::seconds(init_delay_s));
kineto_init();
});
t.detach();
} else {
kineto_init();
}
}
}
} register_libkineto_client;

View File

@ -0,0 +1,11 @@
#pragma once
#include <torch/csrc/jit/runtime/interpreter.h>
#include <torch/csrc/profiler/unwind/unwind.h>
namespace torch {
// declare global_kineto_init for libtorch_cpu.so to call
TORCH_API void global_kineto_init(void);
} // namespace torch