mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[Profiler][PrivateUse1] Profiler support PrivateUse1 key (#120556)
Summary: 1.Package public headers of kineto if USE_KINETO so that they can be used by PrivateUse1 user. 2.Add PrivateUse1 key to ActivityType. 3. Support PrivateUse1 key in function deviceTypeFromActivity and _supported_activities. 4. Fix some bugs when processing profiler results. Co-authored-by: albanD <desmaison.alban@gmail.com> Co-authored-by: Aaron Shi <enye.shi@gmail.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/120556 Approved by: https://github.com/aaronenyeshi
This commit is contained in:
committed by
PyTorch MergeBot
parent
6f4c7eeb08
commit
41613a0803
6
setup.py
6
setup.py
@ -1386,6 +1386,12 @@ def main():
|
||||
"include/tensorpipe/transport/uv/*.h",
|
||||
]
|
||||
)
|
||||
if get_cmake_cache_vars()["USE_KINETO"]:
|
||||
torch_package_data.extend(
|
||||
[
|
||||
"include/kineto/*.h",
|
||||
]
|
||||
)
|
||||
torchgen_package_data = [
|
||||
# Recursive glob doesn't work in setup.py,
|
||||
# https://github.com/pypa/setuptools/issues/1806
|
||||
|
@ -270,7 +270,6 @@ class profile:
|
||||
self.profiler_kind = ProfilerState.KINETO_PRIVATEUSE1_FALLBACK
|
||||
else:
|
||||
self.kineto_activities.add(ProfilerActivity.PrivateUse1)
|
||||
self.profiler_kind = ProfilerState.KINETO_PRIVATEUSE1
|
||||
|
||||
assert (
|
||||
len(self.kineto_activities) > 0
|
||||
@ -317,6 +316,10 @@ class profile:
|
||||
return
|
||||
if self.use_cuda:
|
||||
torch.cuda.synchronize()
|
||||
elif self.use_device and hasattr(torch, self.use_device):
|
||||
privateuse1_module = getattr(torch, self.use_device)
|
||||
if hasattr(privateuse1_module, "synchronize"):
|
||||
privateuse1_module.synchronize()
|
||||
|
||||
t0 = perf_counter_ns()
|
||||
self.kineto_results = _disable_profiler()
|
||||
@ -542,7 +545,10 @@ class profile:
|
||||
and fe.id in device_corr_map
|
||||
):
|
||||
for f_evt in device_corr_map[fe.id]:
|
||||
if f_evt.device_type == DeviceType.CUDA:
|
||||
if (
|
||||
f_evt.device_type == DeviceType.CUDA
|
||||
or f_evt.device_type == DeviceType.PrivateUse1
|
||||
):
|
||||
fe.append_kernel(
|
||||
f_evt.name,
|
||||
f_evt.device_index,
|
||||
|
@ -598,7 +598,7 @@ class FunctionEvent(FormattedTimesMixin):
|
||||
[child.privateuse1_time_total for child in self.cpu_children]
|
||||
)
|
||||
else:
|
||||
assert self.device_type == DeviceType.CUDA
|
||||
assert self.device_type == DeviceType.PrivateUse1
|
||||
return self.privateuse1_time_total
|
||||
|
||||
@property
|
||||
@ -867,7 +867,10 @@ def _build_table(
|
||||
event.self_privateuse1_memory_usage > 0 for event in events
|
||||
)
|
||||
use_device = events[0].use_device
|
||||
if not use_device and (has_privateuse1_mem or has_privateuse1_time):
|
||||
# Running on PrivateUse1 device with profiler but not enable
|
||||
# ProfilerActivity.PrivateUse1 can also catch privateuse1 memory usage.
|
||||
# Here only need to check has_privateuse1_time if not use_device.
|
||||
if not use_device and has_privateuse1_time:
|
||||
raise RuntimeError(
|
||||
"use_device is None, but there is private device performance data."
|
||||
)
|
||||
@ -951,7 +954,7 @@ def _build_table(
|
||||
"Self CUDA Mem",
|
||||
]
|
||||
)
|
||||
if has_privateuse1_mem:
|
||||
if use_device and has_privateuse1_mem:
|
||||
privateuse1 = use_device.upper()
|
||||
headers.extend(
|
||||
[
|
||||
@ -1132,7 +1135,7 @@ def _build_table(
|
||||
_format_memory(evt.self_cuda_memory_usage),
|
||||
]
|
||||
)
|
||||
if has_privateuse1_mem:
|
||||
if use_device and has_privateuse1_mem:
|
||||
row_values.extend(
|
||||
[
|
||||
# PrivateUse1 Mem Total
|
||||
|
@ -332,6 +332,9 @@ PyObject* THPAutograd_initExtension(PyObject* _unused, PyObject* unused) {
|
||||
if (at::hasMTIA()) {
|
||||
activities.insert(torch::profiler::impl::ActivityType::MTIA);
|
||||
}
|
||||
if (c10::get_privateuse1_backend() != "privateuseone") {
|
||||
activities.insert(torch::profiler::impl::ActivityType::PrivateUse1);
|
||||
}
|
||||
#endif
|
||||
return activities;
|
||||
});
|
||||
|
@ -555,7 +555,9 @@ void prepareProfiler(
|
||||
config.state == ProfilerState::KINETO_PRIVATEUSE1_FALLBACK,
|
||||
"Supported only in Kineto profiler");
|
||||
torch::profiler::impl::kineto::prepareTrace(
|
||||
/*cpuOnly=*/!(at::hasCUDA() || at::hasXPU() || at::hasMTIA()),
|
||||
/*cpuOnly=*/!(
|
||||
at::hasCUDA() || at::hasXPU() || at::hasMTIA() ||
|
||||
c10::get_privateuse1_backend() != "privateuseone"),
|
||||
activities,
|
||||
config.experimental_config);
|
||||
|
||||
|
@ -25,6 +25,8 @@ const std::set<libkineto::ActivityType> kCpuTypes{
|
||||
libkineto::ActivityType::CUDA_RUNTIME,
|
||||
libkineto::ActivityType::CUDA_DRIVER,
|
||||
libkineto::ActivityType::PYTHON_FUNCTION,
|
||||
libkineto::ActivityType::PRIVATEUSE1_RUNTIME,
|
||||
libkineto::ActivityType::PRIVATEUSE1_DRIVER,
|
||||
};
|
||||
|
||||
const std::set<libkineto::ActivityType> kCudaTypes = {
|
||||
@ -47,6 +49,15 @@ const std::set<libkineto::ActivityType> kMtiaTypes = {
|
||||
libkineto::ActivityType::MTIA_CCP_EVENTS,
|
||||
libkineto::ActivityType::MTIA_RUNTIME,
|
||||
};
|
||||
const std::set<libkineto::ActivityType> kPrivateUse1Types = {
|
||||
libkineto::ActivityType::GPU_MEMCPY,
|
||||
libkineto::ActivityType::GPU_MEMSET,
|
||||
libkineto::ActivityType::GPU_USER_ANNOTATION,
|
||||
libkineto::ActivityType::CONCURRENT_KERNEL,
|
||||
// PRIVATEUSE1_RUNTIME appears in both kCpuTypes and kPrivateUse1Types.
|
||||
libkineto::ActivityType::PRIVATEUSE1_RUNTIME,
|
||||
libkineto::ActivityType::PRIVATEUSE1_DRIVER,
|
||||
};
|
||||
} // namespace
|
||||
#endif // USE_KINETO
|
||||
|
||||
@ -248,6 +259,9 @@ void prepareTrace(
|
||||
if (collectivesProfilerExists()) {
|
||||
k_activities.insert(libkineto::ActivityType::COLLECTIVE_COMM);
|
||||
}
|
||||
if (activities.count(torch::autograd::profiler::ActivityType::PrivateUse1)) {
|
||||
k_activities.insert(kPrivateUse1Types.begin(), kPrivateUse1Types.end());
|
||||
}
|
||||
|
||||
ExperimentalConfigWrapper configWrap(config);
|
||||
|
||||
@ -336,8 +350,18 @@ c10::DeviceType deviceTypeFromActivity(libkineto::ActivityType activity_type) {
|
||||
case libkineto::ActivityType::GPU_USER_ANNOTATION:
|
||||
case libkineto::ActivityType::CUDA_PROFILER_RANGE:
|
||||
// TODO: T151322015
|
||||
case libkineto::ActivityType::MTIA_CCP_EVENTS:
|
||||
return c10::DeviceType::CUDA;
|
||||
case libkineto::ActivityType::MTIA_CCP_EVENTS: {
|
||||
// PrivateUse1 kineto backend reuse above ActivityTypes,
|
||||
// If PrivateUse1 backend enabled, this should return
|
||||
// c10::DeviceType::PrivateUse1.
|
||||
c10::DeviceType device_type = []() {
|
||||
if (c10::get_privateuse1_backend() != "privateuseone") {
|
||||
return c10::DeviceType::PrivateUse1;
|
||||
}
|
||||
return c10::DeviceType::CUDA;
|
||||
}();
|
||||
return device_type;
|
||||
}
|
||||
case libkineto::ActivityType::CPU_OP:
|
||||
case libkineto::ActivityType::USER_ANNOTATION:
|
||||
case libkineto::ActivityType::EXTERNAL_CORRELATION:
|
||||
@ -347,6 +371,8 @@ c10::DeviceType deviceTypeFromActivity(libkineto::ActivityType activity_type) {
|
||||
case libkineto::ActivityType::MTIA_RUNTIME:
|
||||
case libkineto::ActivityType::PYTHON_FUNCTION:
|
||||
case libkineto::ActivityType::CUDA_DRIVER:
|
||||
case libkineto::ActivityType::PRIVATEUSE1_RUNTIME:
|
||||
case libkineto::ActivityType::PRIVATEUSE1_DRIVER:
|
||||
return c10::DeviceType::CPU;
|
||||
default: {
|
||||
TORCH_WARN(
|
||||
|
@ -17,6 +17,7 @@ enum class C10_API_ENUM ActivityType {
|
||||
XPU, // XPU kernels, runtime
|
||||
CUDA, // CUDA kernels, runtime
|
||||
MTIA, // MTIA kernels, runtime
|
||||
PrivateUse1, // PrivateUse1 kernels, runtime
|
||||
NUM_KINETO_ACTIVITIES, // must be the last one
|
||||
};
|
||||
|
||||
|
@ -325,7 +325,8 @@ void initPythonBindings(PyObject* module) {
|
||||
.value("CPU", ActivityType::CPU)
|
||||
.value("XPU", ActivityType::XPU)
|
||||
.value("MTIA", ActivityType::MTIA)
|
||||
.value("CUDA", ActivityType::CUDA);
|
||||
.value("CUDA", ActivityType::CUDA)
|
||||
.value("PrivateUse1", ActivityType::PrivateUse1);
|
||||
|
||||
py::class_<ExperimentalConfig>(m, "_ExperimentalConfig")
|
||||
.def(
|
||||
|
@ -143,7 +143,9 @@ class _KinetoProfile:
|
||||
use_cuda=(ProfilerActivity.CUDA in self.activities),
|
||||
use_cpu=(ProfilerActivity.CPU in self.activities),
|
||||
use_mtia=(ProfilerActivity.MTIA in self.activities),
|
||||
use_device=None,
|
||||
use_device=self.use_device
|
||||
if (ProfilerActivity.PrivateUse1 in self.activities)
|
||||
else None,
|
||||
record_shapes=self.record_shapes,
|
||||
with_flops=self.with_flops,
|
||||
profile_memory=self.profile_memory,
|
||||
|
Reference in New Issue
Block a user