[pytorch] Name threads in thread pools for better debugging (#130270)

Threads inside the thread pools are not named, so they inherit the main process name or the name of the first thread. In our case if we set `pt_main_thread` as the thread name when a thread does `import torch`, this name will be inherited by all the threads in the created pools.

This PR names the threads in the pools I was able to find. There are other pools created, like OpenMP ones and we need to follow-up on those.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/130270
Approved by: https://github.com/d4l3k, https://github.com/albanD
This commit is contained in:
Valentin Andrei
2024-07-09 08:03:45 +00:00
committed by PyTorch MergeBot
parent 312652c325
commit b139b5090f
4 changed files with 13 additions and 2 deletions

View File

@ -1,5 +1,6 @@
#include <c10/core/thread_pool.h>
#include <c10/util/Logging.h>
#include <c10/util/thread_name.h>
#if !defined(__powerpc__) && !defined(__s390x__)
#include <cpuinfo.h>
#endif
@ -41,6 +42,7 @@ ThreadPool::ThreadPool(
numa_node_id_(numa_node_id) {
for (std::size_t i = 0; i < threads_.size(); ++i) {
threads_[i] = std::thread([this, i, init_thread]() {
c10::setThreadName("pt_thread_pool");
if (init_thread) {
init_thread();
}

View File

@ -232,7 +232,10 @@ class alignas(kGEMMLOWPCacheLineSize) Worker {
: task_(nullptr),
state_(State::ThreadStartup),
counter_to_decrement_when_ready_(counter_to_decrement_when_ready) {
thread_ = std::make_unique<std::thread>([this]() { this->ThreadFunc(); });
thread_ = std::make_unique<std::thread>([this]() {
c10::setThreadName("pt_thread_pool");
this->ThreadFunc();
});
}
~Worker() {

View File

@ -4,6 +4,7 @@
#include <c10/core/Event.h>
#include <c10/util/DeadlockDetection.h>
#include <c10/util/irange.h>
#include <c10/util/thread_name.h>
#include <torch/csrc/autograd/functions/accumulate_grad.h>
#include <torch/csrc/autograd/input_buffer.h>
#include <torch/csrc/distributed/autograd/context/container.h>
@ -76,6 +77,7 @@ class DistAccumulateGradCaptureHook
void DistEngine::globalCpuThread(
const std::shared_ptr<ReadyQueue>& ready_queue) {
c10::setThreadName("pt_dist_engine");
while (true) {
NodeTask task = ready_queue->pop();
if (task.isShutdownTask_) {

View File

@ -2,6 +2,7 @@
#include <c10/util/Logging.h>
#include <c10/util/irange.h>
#include <c10/util/thread_name.h>
#include <torch/csrc/lazy/core/config.h>
#include <torch/csrc/lazy/core/metrics.h>
@ -21,7 +22,10 @@ class ThreadPool {
threads_.reserve(num_threads);
for (const auto i : c10::irange(num_threads)) {
(void)i; // Suppress unused variable warning
threads_.emplace_back([this]() { Worker(); });
threads_.emplace_back([this]() {
c10::setThreadName("pt_thread_pool");
Worker();
});
}
}