Revert "Enable XPUEvent elapsed_time function (#134666)" (#140872)

# Motivation
This PR raises an internal UT failure on XPU.
This reverts commit 4bbd6da33101a8d709f1d2921ad8ae6f9b0dc166.
# Additional Context
refer to https://github.com/pytorch/pytorch/issues/140814

Pull Request resolved: https://github.com/pytorch/pytorch/pull/140872
Approved by: https://github.com/EikanWang
This commit is contained in:
Yu, Guangye
2024-11-16 17:20:28 +00:00
committed by PyTorch MergeBot
parent 3d26c08dda
commit 62d2c5b667
3 changed files with 21 additions and 85 deletions

View File

@ -85,7 +85,8 @@ struct TORCH_XPU_API XPUEvent {
void record(const XPUStream& stream) {
if (!isCreated()) {
device_index_ = stream.device_index();
assignEvent(stream.queue());
event_ = std::make_unique<sycl::event>(
stream.queue().ext_oneapi_submit_barrier());
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
if (C10_UNLIKELY(interp)) {
(*interp)->trace_gpu_event_creation(
@ -99,7 +100,9 @@ struct TORCH_XPU_API XPUEvent {
" does not match recording stream's device ",
stream.device_index(),
".");
reassignEvent(stream.queue());
event_.reset();
event_ = std::make_unique<sycl::event>(
stream.queue().ext_oneapi_submit_barrier());
}
const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace();
if (C10_UNLIKELY(interp)) {
@ -125,7 +128,7 @@ struct TORCH_XPU_API XPUEvent {
}
}
double elapsed_time(const XPUEvent& other) const {
float elapsed_time(const XPUEvent& other) const {
TORCH_CHECK(
isCreated() && other.isCreated(),
"Both events must be recorded before calculating elapsed time.");
@ -135,20 +138,10 @@ struct TORCH_XPU_API XPUEvent {
TORCH_CHECK(
enable_timing_ && other.enable_timing_,
"Both events must be created with argument 'enable_timing=True'.");
#if SYCL_COMPILER_VERSION < 20250000
// TODO: provides the ability to time the execution of commands in a SYCL
// queue without enabling profiling on the entire queue
TORCH_CHECK_NOT_IMPLEMENTED(
false,
"elapsed_time of XPUEvent requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer.");
#endif
using namespace sycl::info::event_profiling;
// Block until both of the recorded events are completed.
uint64_t end_time_ns = other.event().get_profiling_info<command_end>();
uint64_t start_time_ns = event().get_profiling_info<command_end>();
// Return the elapsed time in milliseconds.
return 1e-6 *
(static_cast<double>(end_time_ns) - static_cast<double>(start_time_ns));
false, "elapsed_time is not supported by XPUEvent.");
}
void synchronize() const {
@ -163,24 +156,6 @@ struct TORCH_XPU_API XPUEvent {
}
private:
void assignEvent(sycl::queue& queue) {
#if SYCL_COMPILER_VERSION >= 20250000
if (enable_timing_) {
event_ = std::make_unique<sycl::event>(
sycl::ext::oneapi::experimental::submit_profiling_tag(queue));
} else {
event_ = std::make_unique<sycl::event>(queue.ext_oneapi_submit_barrier());
}
#else
event_ = std::make_unique<sycl::event>(queue.ext_oneapi_submit_barrier());
#endif
}
void reassignEvent(sycl::queue& queue) {
event_.reset();
assignEvent(queue);
}
bool enable_timing_ = false;
DeviceIndex device_index_ = -1;
// Only need to track the last event, as events in an in-order queue are

View File

@ -140,30 +140,6 @@ struct XPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
event_command_status::complete;
}
double elapsedTime(
void* start_event,
void* end_event,
const DeviceIndex device_index) const override {
#if SYCL_COMPILER_VERSION < 20250000
TORCH_CHECK_NOT_IMPLEMENTED(
false,
"elapsedTime requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer.");
#endif
TORCH_CHECK(
start_event && end_event,
"Both events must be recorded before calculating elapsed time.");
auto* xpu_start_event = reinterpret_cast<sycl::event*>(start_event);
auto* xpu_end_event = reinterpret_cast<sycl::event*>(end_event);
using namespace sycl::info::event_profiling;
// Block until both of the recorded events are completed.
uint64_t end_time_ns = xpu_end_event->get_profiling_info<command_end>();
uint64_t start_time_ns = xpu_start_event->get_profiling_info<command_end>();
// Return the elapsed time in milliseconds.
return 1e-6 *
(static_cast<double>(end_time_ns) - static_cast<double>(start_time_ns));
}
// Stream-related functions
bool queryStream(const Stream& stream) const override {
const XPUStream xpu_stream{stream};
@ -200,6 +176,12 @@ struct XPUGuardImpl final : public c10::impl::DeviceGuardImplInterface {
const XPUStream xpu_stream{stream};
XPUCachingAllocator::recordStream(data_ptr, xpu_stream);
}
double elapsedTime(void* event1, void* event2, const DeviceIndex device_index)
const override {
TORCH_CHECK_NOT_IMPLEMENTED(
false, "elapsedTime is not supported by XPU backend.");
}
};
} // namespace c10::xpu::impl

View File

@ -3,7 +3,6 @@
import subprocess
import sys
import tempfile
import time
import unittest
import torch
@ -236,21 +235,6 @@ print(torch.xpu.device_count())
stream.record_event(event)
event.synchronize()
self.assertTrue(event.query())
start_event = torch.xpu.Event(enable_timing=True)
end_event = torch.xpu.Event(enable_timing=True)
stream.record_event(start_event)
time.sleep(0.1)
stream.record_event(end_event)
torch.xpu.synchronize()
if int(torch.version.xpu) >= 20250000:
self.assertGreater(start_event.elapsed_time(end_event), 0)
self.assertLess(end_event.elapsed_time(start_event), 0)
else:
with self.assertRaisesRegex(
NotImplementedError,
"elapsed_time of XPUEvent requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer.",
):
start_event.elapsed_time(end_event)
def test_generic_stream_event(self):
stream = torch.Stream("xpu")
@ -266,8 +250,8 @@ print(torch.xpu.device_count())
self.assertEqual(stream.stream_id, xpu_stream.stream_id)
self.assertNotEqual(stream.stream_id, torch.xpu.current_stream().stream_id)
event1 = torch.Event("xpu", enable_timing=True)
event2 = torch.Event("xpu", enable_timing=True)
event1 = torch.Event("xpu")
event2 = torch.Event("xpu")
self.assertEqual(event1.event_id, 0)
a = torch.randn(1000)
b = torch.randn(1000)
@ -284,15 +268,10 @@ print(torch.xpu.device_count())
self.assertTrue(event2.query())
self.assertNotEqual(event1.event_id, event2.event_id)
self.assertEqual(c_xpu.cpu(), a + b)
if int(torch.version.xpu) >= 20250000:
self.assertGreater(event1.elapsed_time(event2), 0)
self.assertLess(event2.elapsed_time(event1), 0)
else:
with self.assertRaisesRegex(
NotImplementedError,
"elapsedTime requires PyTorch to be built with SYCL compiler version 2025.0.0 or newer.",
):
event1.elapsed_time(event2)
with self.assertRaisesRegex(
NotImplementedError, "elapsedTime is not supported by XPU backend."
):
event1.elapsed_time(event2)
xpu_event = torch.xpu.Event()
self.assertIsInstance(xpu_event, torch.Event)
self.assertTrue(issubclass(type(xpu_event), torch.Event))