export XPUStream to doc (#121398)

# Motivation We would like to export XPUStream to public [doc](https://pytorch.org/cppdocs/api/library_root.html). The detailed documentation can help users understand and utilize XPU more effectively. # Additional Context A detailed XPUStream API and usage should be documented to public doc, like cuda's [doc](https://github.com/pytorch/pytorch/blob/main/docs/cpp/source/notes/tensor_cuda_stream.rst). Pull Request resolved: https://github.com/pytorch/pytorch/pull/121398 Approved by: https://github.com/gujinghui, https://github.com/jgong5, https://github.com/EikanWang, https://github.com/albanD
2025-10-20 21:14:14 +08:00 · 2024-03-29 16:20:34 +00:00
parent f4ff063c33
commit 3d3d4e1cd5
2 changed files with 34 additions and 12 deletions
--- a/c10/xpu/XPUStream.h
+++ b/c10/xpu/XPUStream.h
@ -30,21 +30,19 @@ static constexpr int max_compile_time_stream_priorities = 2;

 /*
 * This serves as a wrapper around c10::Stream and acts as a representation for
- * a SYCL queue. On each device, a SYCL queue pool consists of kStreamsPerPool
- * queues, and you can access a particular queue by its index. The index is
- * extracted from XPUStream.id().
+ * a SYCL queue, which allows asynchronous execution of XPU tasks.
 */
 class C10_XPU_API XPUStream {
 public:
  enum Unchecked { UNCHECKED };

-  // Construct a XPUStream from a Stream. This construction is checked, and
-  // will raise an error if the Stream is not, in fact, a XPU stream.
+  /// Construct a XPUStream from a Stream. This construction is checked, and
+  /// will raise an error if the Stream is not, in fact, a XPU stream.
  explicit XPUStream(Stream stream) : stream_(stream) {
    TORCH_CHECK(stream_.device_type() == DeviceType::XPU);
  }

-  // Construct a XPUStream from a Stream with no error checking.
+  /// Construct a XPUStream from a Stream with no error checking.
  explicit XPUStream(Unchecked, Stream stream) : stream_(stream) {}

  bool operator==(const XPUStream& other) const noexcept {
@ -55,53 +53,70 @@ class C10_XPU_API XPUStream {
    return unwrap() != other.unwrap();
  }

+  /// Implicit conversion to sycl::queue&.
  operator sycl::queue&() const {
    return queue();
  }

+  /// Implicit conversion to Stream (a.k.a., forget that the stream is a
+  /// XPU stream).
  operator Stream() const {
    return unwrap();
  }

+  /// Get the XPU device type that this stream is associated with.
  DeviceType device_type() const {
    return DeviceType::XPU;
  }

+  /// Get the XPU device index that this stream is associated with.
  DeviceIndex device_index() const {
    return stream_.device_index();
  }

+  /// Get the full Device that this stream is associated with. The Device is
+  /// guaranteed to be a XPU device.
  Device device() const {
    return Device(DeviceType::XPU, device_index());
  }

-  // Return the stream ID corresponding to this particular stream. StreamId is
+  /// Return the stream ID corresponding to this particular stream. StreamId is
  /// a int64_t representation generated by its type and index.
  StreamId id() const {
    return stream_.id();
  }

+  /// Return true if all enqueued tasks in this stream have been completed,
+  /// otherwise return false.
  bool query() const {
    return queue().ext_oneapi_empty();
  }

+  /// Performs a blocking wait for the completion of all enqueued tasks in this
+  /// stream.
  void synchronize() const {
    queue().wait_and_throw();
  }

+  /// Return the priority that this stream is associated with. Lower numbers
+  /// represent higher priority.
  int priority() const;

-  // Explicit conversion to sycl::queue&.
+  /// Explicit conversion to sycl::queue&.
  sycl::queue& queue() const;

+  /// Explicit conversion to Stream.
  Stream unwrap() const {
    return stream_;
  }

+  /// Reversibly pack a XPUStream into a struct representation. The XPUStream
+  /// can be unpacked using unpack3().
  struct c10::StreamData3 pack3() const {
    return stream_.pack3();
  }

+  /// Unpack a XPUStream from the 3 fields generated by pack3().
  static XPUStream unpack3(
      StreamId stream_id,
      DeviceIndex device_index,
@ -109,6 +124,7 @@ class C10_XPU_API XPUStream {
    return XPUStream(Stream::unpack3(stream_id, device_index, device_type));
  }

+  /// Return the range of priority **supported by PyTorch**.
  static std::tuple<int, int> priority_range() {
    return std::make_tuple(0, -max_compile_time_stream_priorities + 1);
  }
@ -120,13 +136,18 @@ class C10_XPU_API XPUStream {
 /**
 * Get a stream from the pool in a round-robin fashion.
 *
- * You can request a stream from the high priority pool by setting
- * isHighPriority to true, or a priority value for a specific device by setting
- * device.
+ * You can request a stream from the highest priority pool by setting
+ * isHighPriority to true for a specific device.
 */
 C10_XPU_API XPUStream
 getStreamFromPool(const bool isHighPriority = false, DeviceIndex device = -1);
-// The priority number lower, the priority higher.
+
+/**
+ * Get a stream from the pool in a round-robin fashion.
+ *
+ * You can request a stream by setting a priority value for a specific device.
+ * The priority number lower, the priority higher.
+ */
 C10_XPU_API XPUStream
 getStreamFromPool(const int priority, DeviceIndex device = -1);

--- a/docs/cpp/source/Doxyfile
+++ b/docs/cpp/source/Doxyfile
@ -56,6 +56,7 @@ INPUT                  = ../../../aten/src/ATen/ATen.h \
                         ../../../c10/util/Optional.h \
                         ../../../c10/cuda/CUDAGuard.h \
                         ../../../c10/cuda/CUDAStream.h \
+                         ../../../c10/xpu/XPUStream.h \
                         ../../../torch/csrc/api/include \
                         ../../../torch/csrc/api/src \
                         ../../../torch/csrc/autograd/autograd.h \