mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[pytorch] Name threads in thread pools for better debugging (#130270)
Threads inside the thread pools are not named, so they inherit the main process name or the name of the first thread. In our case if we set `pt_main_thread` as the thread name when a thread does `import torch`, this name will be inherited by all the threads in the created pools. This PR names the threads in the pools I was able to find. There are other pools created, like OpenMP ones and we need to follow-up on those. Pull Request resolved: https://github.com/pytorch/pytorch/pull/130270 Approved by: https://github.com/d4l3k, https://github.com/albanD
This commit is contained in:
committed by
PyTorch MergeBot
parent
312652c325
commit
b139b5090f
@ -1,5 +1,6 @@
|
||||
#include <c10/core/thread_pool.h>
|
||||
#include <c10/util/Logging.h>
|
||||
#include <c10/util/thread_name.h>
|
||||
#if !defined(__powerpc__) && !defined(__s390x__)
|
||||
#include <cpuinfo.h>
|
||||
#endif
|
||||
@ -41,6 +42,7 @@ ThreadPool::ThreadPool(
|
||||
numa_node_id_(numa_node_id) {
|
||||
for (std::size_t i = 0; i < threads_.size(); ++i) {
|
||||
threads_[i] = std::thread([this, i, init_thread]() {
|
||||
c10::setThreadName("pt_thread_pool");
|
||||
if (init_thread) {
|
||||
init_thread();
|
||||
}
|
||||
|
@ -232,7 +232,10 @@ class alignas(kGEMMLOWPCacheLineSize) Worker {
|
||||
: task_(nullptr),
|
||||
state_(State::ThreadStartup),
|
||||
counter_to_decrement_when_ready_(counter_to_decrement_when_ready) {
|
||||
thread_ = std::make_unique<std::thread>([this]() { this->ThreadFunc(); });
|
||||
thread_ = std::make_unique<std::thread>([this]() {
|
||||
c10::setThreadName("pt_thread_pool");
|
||||
this->ThreadFunc();
|
||||
});
|
||||
}
|
||||
|
||||
~Worker() {
|
||||
|
@ -4,6 +4,7 @@
|
||||
#include <c10/core/Event.h>
|
||||
#include <c10/util/DeadlockDetection.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <c10/util/thread_name.h>
|
||||
#include <torch/csrc/autograd/functions/accumulate_grad.h>
|
||||
#include <torch/csrc/autograd/input_buffer.h>
|
||||
#include <torch/csrc/distributed/autograd/context/container.h>
|
||||
@ -76,6 +77,7 @@ class DistAccumulateGradCaptureHook
|
||||
|
||||
void DistEngine::globalCpuThread(
|
||||
const std::shared_ptr<ReadyQueue>& ready_queue) {
|
||||
c10::setThreadName("pt_dist_engine");
|
||||
while (true) {
|
||||
NodeTask task = ready_queue->pop();
|
||||
if (task.isShutdownTask_) {
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <c10/util/Logging.h>
|
||||
#include <c10/util/irange.h>
|
||||
#include <c10/util/thread_name.h>
|
||||
#include <torch/csrc/lazy/core/config.h>
|
||||
#include <torch/csrc/lazy/core/metrics.h>
|
||||
|
||||
@ -21,7 +22,10 @@ class ThreadPool {
|
||||
threads_.reserve(num_threads);
|
||||
for (const auto i : c10::irange(num_threads)) {
|
||||
(void)i; // Suppress unused variable warning
|
||||
threads_.emplace_back([this]() { Worker(); });
|
||||
threads_.emplace_back([this]() {
|
||||
c10::setThreadName("pt_thread_pool");
|
||||
Worker();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user