[c10] Move profiler clock to libc10 for timestamps (#111972)

Summary: Move the profiler's Approximate Clock from libtorch to libc10. The main reason is to allow c10 features to get time. The clock is using TSC when available for performance. CUDA Caching Allocator's implementation of memory snapshot will add the timestamps to memory events with this same clock in subsequent diff. Test Plan: CI Differential Revision: D50601935 Pulled By: aaronenyeshi Pull Request resolved: https://github.com/pytorch/pytorch/pull/111972 Approved by: https://github.com/davidberard98
2025-10-27 17:54:55 +08:00 · 2023-10-27 16:18:40 +00:00
parent fdbb73fa4e
commit 63c089b09d
16 changed files with 283 additions and 242 deletions
--- a/test/cpp/jit/test_backend_compiler_lib.cpp
+++ b/test/cpp/jit/test_backend_compiler_lib.cpp
@ -1,5 +1,6 @@
 #include <ATen/Utils.h>
 #include <c10/core/TensorImpl.h>
+#include <c10/util/ApproximateClock.h>
 #include <torch/csrc/jit/backends/backend.h>
 #include <torch/csrc/jit/backends/backend_exception.h>

@ -112,14 +113,14 @@ class BackendWithCompiler : public PyTorchBackendInterface {

    c10::List<at::Tensor> output_list;
 #ifndef NO_PROFILING
-    auto start_us = torch::profiler::impl::getTime() / 1000;
+    auto start_us = c10::getTime() / 1000;
 #endif
    for (const auto& token : handle.toList()) {
      IValue val = token;
      auto instruction = val.toTupleRef().elements()[0].toStringRef();
      auto debug_handle = val.toTupleRef().elements()[1].toInt();
 #ifndef NO_PROFILING
-      auto start_time_us = torch::profiler::impl::getTime() / 1000;
+      auto start_time_us = c10::getTime() / 1000;
 #endif
      try {
        if (instruction.rfind("prim::Constant", 0) == 0) {
@ -171,7 +172,7 @@ class BackendWithCompiler : public PyTorchBackendInterface {
        TORCH_DELEGATED_BACKEND_THROW(false, e.what(), debug_handle);
      }
 #ifndef NO_PROFILING
-      auto end_time_us = torch::profiler::impl::getTime() / 1000;
+      auto end_time_us = c10::getTime() / 1000;
      auto duration = end_time_us - start_time_us;
      op_runtimes_us.emplace_back(duration, debug_handle, instruction);
 #endif