mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
[dynamo][guards] Profile guard manager in C++ (#140110)
This should remove the pybind noise from the profiling. Pull Request resolved: https://github.com/pytorch/pytorch/pull/140110 Approved by: https://github.com/jansel ghstack dependencies: #139953
This commit is contained in:
committed by
PyTorch MergeBot
parent
a140e65e0f
commit
e6c5a77485
@ -1,5 +1,5 @@
|
||||
# mypy: allow-untyped-defs
|
||||
from typing import Any
|
||||
from typing import Any, Dict
|
||||
|
||||
import torch
|
||||
|
||||
@ -104,6 +104,10 @@ def install_no_tensor_aliasing_guard(
|
||||
tensor_names: list[str],
|
||||
verbose_code_parts: list[str],
|
||||
): ...
|
||||
def profile_guard_manager(
|
||||
guard_manager: GuardManager,
|
||||
f_locals: Dict[str, Any],
|
||||
) -> float: ...
|
||||
|
||||
class TensorGuards:
|
||||
def __init__(
|
||||
|
@ -16,7 +16,6 @@ import math
|
||||
import re
|
||||
import sys
|
||||
import textwrap
|
||||
import time
|
||||
import types
|
||||
import warnings
|
||||
import weakref
|
||||
@ -47,6 +46,7 @@ from torch._C._dynamo.guards import (
|
||||
DictGuardManager,
|
||||
install_no_tensor_aliasing_guard,
|
||||
install_object_aliasing_guard,
|
||||
profile_guard_manager,
|
||||
RootGuardManager,
|
||||
)
|
||||
from torch._dynamo.source import (
|
||||
@ -2219,7 +2219,10 @@ class CheckFunctionManager:
|
||||
raise AssertionError(f"Guard check failed: {reasons}")
|
||||
|
||||
if guards_log.isEnabledFor(logging.DEBUG):
|
||||
self.profile_guard_eval(output_graph.local_scope)
|
||||
latency = profile_guard_manager(
|
||||
self.guard_manager.root, output_graph.local_scope
|
||||
)
|
||||
guards_log.debug("Guard eval latency = %s us", f"{latency:.2f}")
|
||||
|
||||
# NB - We have to very careful of cleaning up here. Because of the
|
||||
# invalidate function, we can create a weakref finalizer that keeps
|
||||
@ -2232,18 +2235,6 @@ class CheckFunctionManager:
|
||||
self._weakrefs.clear()
|
||||
self.output_graph = None
|
||||
|
||||
def profile_guard_eval(self, f_locals):
|
||||
start_time = time.time()
|
||||
iterations = 0
|
||||
profile_duration = 1 # unit is seconds
|
||||
|
||||
while time.time() - start_time < profile_duration:
|
||||
self.guard_manager.check(f_locals)
|
||||
iterations += 1
|
||||
|
||||
guard_latency = 10**6 / iterations # us
|
||||
guards_log.debug("Guard eval latency = %s us", f"{guard_latency:.2f}")
|
||||
|
||||
def compile_check_fn(self, builder, guards_out, guard_fail_fn):
|
||||
# see parallel handling of ".0" / "___implicit0" in _eval_frame.c
|
||||
largs = builder.argnames
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <ATen/xpu/EmptyTensor.h>
|
||||
#endif
|
||||
|
||||
#include <chrono>
|
||||
#include <sstream>
|
||||
#include <tuple>
|
||||
#include <utility>
|
||||
@ -1627,6 +1628,7 @@ class GuardAccessor {
|
||||
* entries.
|
||||
*/
|
||||
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
|
||||
class GuardManager {
|
||||
public:
|
||||
GuardManager() = delete;
|
||||
@ -3681,6 +3683,38 @@ void install_no_tensor_aliasing_guard(
|
||||
}
|
||||
}
|
||||
|
||||
double profile_guard_manager(RootGuardManager* root, py::object f_locals) {
|
||||
PyObject* locals = f_locals.ptr();
|
||||
|
||||
// Warmup
|
||||
for (int i = 0; i < 10; i++) {
|
||||
root->check_nopybind(locals);
|
||||
}
|
||||
|
||||
int count = 0;
|
||||
auto start = std::chrono::high_resolution_clock::now();
|
||||
float profile_duration = 1.0;
|
||||
|
||||
// Run the loop for profile_duration seconds
|
||||
while (true) {
|
||||
root->check_nopybind(locals);
|
||||
count++;
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> elapsed = end - start;
|
||||
|
||||
// Break the loop if 1 second has passed
|
||||
if (elapsed.count() >= 1.0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
auto end = std::chrono::high_resolution_clock::now();
|
||||
std::chrono::duration<double> total_elapsed = end - start;
|
||||
|
||||
// Calculate the average time per iteration in microseconds
|
||||
return (total_elapsed.count() * profile_duration * 1e6) / count;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
static void* _torchinductor_pyobject_tensor_data_ptr(PyObject* obj) {
|
||||
@ -4506,6 +4540,7 @@ PyObject* torch_c_dynamo_guards_init() {
|
||||
py_m.def("install_object_aliasing_guard", install_object_aliasing_guard);
|
||||
py_m.def(
|
||||
"install_no_tensor_aliasing_guard", install_no_tensor_aliasing_guard);
|
||||
py_m.def("profile_guard_manager", profile_guard_manager);
|
||||
|
||||
// initialize dict_version_map watcher for 3.12
|
||||
#if IS_PYTHON_3_12_PLUS
|
||||
|
Reference in New Issue
Block a user