[pytorch] Name threads in thread pools for better debugging (#130270)

Threads inside the thread pools are not named, so they inherit the main process name or the name of the first thread. In our case if we set `pt_main_thread` as the thread name when a thread does `import torch`, this name will be inherited by all the threads in the created pools. This PR names the threads in the pools I was able to find. There are other pools created, like OpenMP ones and we need to follow-up on those. Pull Request resolved: https://github.com/pytorch/pytorch/pull/130270 Approved by: https://github.com/d4l3k, https://github.com/albanD
2025-10-20 21:14:14 +08:00 · 2024-07-09 08:03:45 +00:00
parent 312652c325
commit b139b5090f
4 changed files with 13 additions and 2 deletions
--- a/c10/core/thread_pool.cpp
+++ b/c10/core/thread_pool.cpp
@ -1,5 +1,6 @@
 #include <c10/core/thread_pool.h>
 #include <c10/util/Logging.h>
+#include <c10/util/thread_name.h>
 #if !defined(__powerpc__) && !defined(__s390x__)
 #include <cpuinfo.h>
 #endif
@ -41,6 +42,7 @@ ThreadPool::ThreadPool(
      numa_node_id_(numa_node_id) {
  for (std::size_t i = 0; i < threads_.size(); ++i) {
    threads_[i] = std::thread([this, i, init_thread]() {
+      c10::setThreadName("pt_thread_pool");
      if (init_thread) {
        init_thread();
      }
--- a/caffe2/utils/threadpool/WorkersPool.h
+++ b/caffe2/utils/threadpool/WorkersPool.h
@ -232,7 +232,10 @@ class alignas(kGEMMLOWPCacheLineSize) Worker {
      : task_(nullptr),
        state_(State::ThreadStartup),
        counter_to_decrement_when_ready_(counter_to_decrement_when_ready) {
-    thread_ = std::make_unique<std::thread>([this]() { this->ThreadFunc(); });
+    thread_ = std::make_unique<std::thread>([this]() {
+      c10::setThreadName("pt_thread_pool");
+      this->ThreadFunc();
+    });
  }

  ~Worker() {
--- a/torch/csrc/distributed/autograd/engine/dist_engine.cpp
+++ b/torch/csrc/distributed/autograd/engine/dist_engine.cpp
@ -4,6 +4,7 @@
 #include <c10/core/Event.h>
 #include <c10/util/DeadlockDetection.h>
 #include <c10/util/irange.h>
+#include <c10/util/thread_name.h>
 #include <torch/csrc/autograd/functions/accumulate_grad.h>
 #include <torch/csrc/autograd/input_buffer.h>
 #include <torch/csrc/distributed/autograd/context/container.h>
@ -76,6 +77,7 @@ class DistAccumulateGradCaptureHook

 void DistEngine::globalCpuThread(
    const std::shared_ptr<ReadyQueue>& ready_queue) {
+  c10::setThreadName("pt_dist_engine");
  while (true) {
    NodeTask task = ready_queue->pop();
    if (task.isShutdownTask_) {
--- a/torch/csrc/lazy/core/thread_pool.cpp
+++ b/torch/csrc/lazy/core/thread_pool.cpp
@ -2,6 +2,7 @@

 #include <c10/util/Logging.h>
 #include <c10/util/irange.h>
+#include <c10/util/thread_name.h>
 #include <torch/csrc/lazy/core/config.h>
 #include <torch/csrc/lazy/core/metrics.h>

@ -21,7 +22,10 @@ class ThreadPool {
    threads_.reserve(num_threads);
    for (const auto i : c10::irange(num_threads)) {
      (void)i; // Suppress unused variable warning
-      threads_.emplace_back([this]() { Worker(); });
+      threads_.emplace_back([this]() {
+        c10::setThreadName("pt_thread_pool");
+        Worker();
+      });
    }
  }