[Profiler] Create Auto-Trace Frontend for Trace ID (#139310)

Summary:
This PR adds Auto-Trace implementation for Trace ID. By default, the python side will generate a uuid in the same format as the one set in the backend by kineto. Upon running an auto-trace, the python generated trace id will overwrite the one set in kineto using the Config variable. Since we don't expect users to generate on-demand traces after an auto-trace we can simply keep overwriting the backend trace id whenever autotrace is ran. If we one day want to eventually do something like this, we simply have to add a call in kineto on the backend to generate a new ID upon start of profiling.

We also implement a custom callback in the frontend such that users can generate their own trace ids if they wish to. This works similarly as the default, only difference being that they have to manually set this callback after a profiler is generated. We use a specific call to set this rather then putting it in the frontend initializer in case users want to change the trace_id for different repeats.

Test Plan: Tested both default and custom callbacks using the verbose prints added. Trace ids on the frontend and the prints on the backend for the manifold upload matched.

Differential Revision: D65178308

Pull Request resolved: https://github.com/pytorch/pytorch/pull/139310
Approved by: https://github.com/shengfukevin
This commit is contained in:
Shivam Raikundalia
2024-10-31 19:02:57 +00:00
committed by PyTorch MergeBot
parent 7faf0ad913
commit ac7acfb894
9 changed files with 90 additions and 21 deletions

View File

@ -1,5 +1,5 @@
from enum import Enum
from typing import Any, Literal
from typing import Any, Literal, Optional
from typing_extensions import TypeAlias
from torch._C import device, dtype, layout
@ -72,6 +72,7 @@ class ProfilerConfig:
with_flops: bool,
with_modules: bool,
experimental_config: _ExperimentalConfig,
trace_id: Optional[str] = None,
) -> None: ...
class _ProfilerEvent:

View File

@ -1,4 +1,5 @@
# mypy: allow-untyped-defs
import uuid
from collections import defaultdict
from dataclasses import dataclass
from time import perf_counter_ns
@ -209,6 +210,7 @@ class profile:
use_cpu=True,
experimental_config=None,
acc_events=False,
custom_trace_id_callback=None,
):
self.enabled: bool = enabled
if not self.enabled:
@ -245,7 +247,8 @@ class profile:
self.profiling_start_time_ns = 0
self.profiling_end_time_ns = 0
self._stats = _ProfilerStats()
self.custom_trace_id_callback = custom_trace_id_callback
self.trace_id = ""
if not self.use_cpu:
assert (
use_kineto
@ -305,7 +308,22 @@ class profile:
len(self.kineto_activities) > 0
), "No activities specified for the profiler"
def config(self):
def default_trace_id(self):
# Generate a UUID
uuid_raw = uuid.uuid4()
return f"{uuid_raw.int:032X}"
def create_trace_id(self):
if self.custom_trace_id_callback:
return self.custom_trace_id_callback()
return self.default_trace_id()
def config(self, create_trace_id=False):
# only need to generate new trace id upon prepare trace not start trace
if create_trace_id:
trace_id = self.create_trace_id()
self.trace_id = trace_id
return ProfilerConfig(
self.profiler_kind,
self.record_shapes,
@ -314,6 +332,7 @@ class profile:
self.with_flops,
self.with_modules,
self.experimental_config,
self.trace_id,
)
def __enter__(self):
@ -328,7 +347,7 @@ class profile:
def _prepare_trace(self):
self.entered = True
t0 = perf_counter_ns()
_prepare_profiler(self.config(), self.kineto_activities)
_prepare_profiler(self.config(create_trace_id=True), self.kineto_activities)
t1 = perf_counter_ns()
self._stats.profiler_prepare_call_duration_us = int((t1 - t0) / 1000)
@ -336,7 +355,7 @@ class profile:
self.entered = True
_run_on_profiler_start()
t0 = perf_counter_ns()
_enable_profiler(self.config(), self.kineto_activities)
_enable_profiler(self.config(create_trace_id=False), self.kineto_activities)
t1 = perf_counter_ns()
self._stats.profiler_enable_call_duration_us = int((t1 - t0) / 1000)
self.profiling_start_time_ns = t1

View File

@ -603,7 +603,8 @@ void prepareProfiler(
at::hasCUDA() || at::hasXPU() || at::hasMTIA() ||
c10::get_privateuse1_backend() != "privateuseone"),
activities,
config.experimental_config);
config.experimental_config,
config.trace_id);
if (!config.experimental_config.performance_events.empty()) {
/* For now only CPU activity is supported */

View File

@ -220,10 +220,21 @@ bool collectivesProfilerExists() {
#endif
}
static const std::string setTraceID(const std::string& trace_id) {
if (trace_id == "") {
return "";
}
std::stringstream configss;
configss << "REQUEST_TRACE_ID=" << trace_id << "\n";
configss << "REQUEST_GROUP_TRACE_ID=" << trace_id << "\n";
return configss.str();
}
void prepareTrace(
const bool cpuOnly,
const ActivitySet& activities,
const torch::profiler::impl::ExperimentalConfig& config) {
const torch::profiler::impl::ExperimentalConfig& config,
const std::string& trace_id) {
#ifdef USE_KINETO
libkineto::api().resetKinetoTLS();
if (!libkineto::api().isProfilerRegistered()) {
@ -270,7 +281,9 @@ void prepareTrace(
return;
}
libkineto::api().activityProfiler().prepareTrace(k_activities);
const std::string configStr = setTraceID(trace_id);
libkineto::api().activityProfiler().prepareTrace(k_activities, configStr);
#endif // USE_KINETO
}

View File

@ -111,7 +111,8 @@ using ActivitySet = std::set<torch::autograd::profiler::ActivityType>;
void prepareTrace(
const bool cpuOnly,
const ActivitySet& activities,
const torch::profiler::impl::ExperimentalConfig& config);
const torch::profiler::impl::ExperimentalConfig& config,
const std::string& trace_id = "");
void toggleCollectionDynamic(const bool enable);
void startTrace();

View File

@ -38,14 +38,16 @@ ProfilerConfig::ProfilerConfig(
bool with_stack,
bool with_flops,
bool with_modules,
ExperimentalConfig experimental_config)
ExperimentalConfig experimental_config,
const std::string& trace_id)
: state{state},
experimental_config{std::move(experimental_config)},
report_input_shapes{report_input_shapes},
profile_memory{profile_memory},
with_stack{with_stack},
with_flops{with_flops},
with_modules{with_modules} {}
with_modules{with_modules},
trace_id{trace_id} {}
bool ProfilerConfig::disabled() const {
return state == torch::profiler::impl::ProfilerState::Disabled;

View File

@ -103,7 +103,8 @@ struct TORCH_API ProfilerConfig {
bool with_stack = false,
bool with_flops = false,
bool with_modules = false,
ExperimentalConfig experimental_config = ExperimentalConfig());
ExperimentalConfig experimental_config = ExperimentalConfig(),
const std::string& trace_id = "");
bool disabled() const;
bool global() const;
@ -115,6 +116,7 @@ struct TORCH_API ProfilerConfig {
bool with_stack;
bool with_flops;
bool with_modules;
std::string trace_id;
// For serialization
at::IValue toIValue() const;

View File

@ -410,15 +410,26 @@ void initPythonBindings(PyObject* module) {
}));
py::class_<ProfilerConfig>(m, "ProfilerConfig")
.def(py::init<
ProfilerState,
bool, /* report_input_shapes */
bool, /* profile_memory */
bool, /* with_stack */
bool, /* with_flops */
bool, /* with_modules */
ExperimentalConfig /* experimental_config */
>());
.def(
py::init<
ProfilerState,
bool, /* report_input_shapes */
bool, /* profile_memory */
bool, /* with_stack */
bool, /* with_flops */
bool, /* with_modules */
ExperimentalConfig /* experimental_config */,
std::string /* trace_id */
>(),
py::arg("state"),
py::arg("report_input_shapes"),
py::arg("profile_memory"),
py::arg("with_stack"),
py::arg("with_flops"),
py::arg("with_modules"),
py::arg("experimental_config"),
py::arg("trace_id") = "" // Make trace_id the only optional param
);
py::enum_<EventType>(m, "_EventType")
.value("TorchOp", EventType::TorchOp)

View File

@ -141,6 +141,7 @@ class _KinetoProfile:
experimental_config: Optional[_ExperimentalConfig] = None,
execution_trace_observer: Optional[_ITraceObserver] = None,
acc_events: bool = False,
custom_trace_id_callback: Optional[Callable[[], str]] = None,
):
self.activities = set(activities) if activities else supported_activities()
self.record_shapes = record_shapes
@ -151,6 +152,7 @@ class _KinetoProfile:
self.experimental_config = experimental_config
self.execution_trace_observer = execution_trace_observer
self.acc_events = acc_events
self.custom_trace_id_callback = custom_trace_id_callback
self.profiler: Optional[prof.profile] = None
self.mem_tl: Optional[MemoryProfileTimeline] = None
self.use_device = None
@ -186,6 +188,7 @@ class _KinetoProfile:
use_kineto=True,
experimental_config=self.experimental_config,
acc_events=self.acc_events,
custom_trace_id_callback=self.custom_trace_id_callback,
)
self.profiler._prepare_trace()
@ -661,6 +664,7 @@ class profile(_KinetoProfile):
acc_events: bool = False,
# deprecated:
use_cuda: Optional[bool] = None,
custom_trace_id_callback: Optional[Callable[[], str]] = None,
):
activities_set = set(activities) if activities else supported_activities()
if use_cuda is not None:
@ -685,6 +689,7 @@ class profile(_KinetoProfile):
experimental_config=experimental_config,
execution_trace_observer=execution_trace_observer,
acc_events=acc_events,
custom_trace_id_callback=custom_trace_id_callback,
)
if schedule:
@ -806,6 +811,20 @@ class profile(_KinetoProfile):
)
self.step_rec_fn.__enter__()
def set_custom_trace_id_callback(self, callback):
"""
Sets a callback to be called when a new trace ID is generated.
"""
self.custom_trace_id_callback = callback
def get_trace_id(self):
"""
Returns the current trace ID.
"""
if self.profiler is None:
return None
return self.profiler.trace_id
def _trace_ready(self):
if self.on_trace_ready:
self.on_trace_ready(self)