Eliminate c10::optional usage in PyTorch (#144346)

Differential Revision: D67907427 Pull Request resolved: https://github.com/pytorch/pytorch/pull/144346 Approved by: https://github.com/hl475
2025-10-20 21:14:14 +08:00 · 2025-01-08 01:14:04 +00:00
parent f002825e1e
commit 013c796b1e
5 changed files with 20 additions and 20 deletions
--- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_bwd_ck.hip
+++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_bwd_ck.hip
@ -39,7 +39,7 @@ fmha_bwd_args get_ck_fmha_bwd_args(const mask_info &mask,
                                   const at::Tensor q,
                                   const at::Tensor k,
                                   const at::Tensor v,
-                                   c10::optional<at::Tensor> &alibi_slopes_,
+                                   std::optional<at::Tensor> &alibi_slopes_,
                                   const at::Tensor out,
                                   const at::Tensor softmax_lse,
                                   const at::Tensor dout,
@ -201,10 +201,10 @@ mha_bwd_ck(const at::Tensor &dout,                   // batch_size x seqlen_q x
           const at::Tensor &v,                      // batch_size x seqlen_k x num_heads_k x head_size
           const at::Tensor &out,                    // batch_size x seqlen_q x num_heads x head_size
           const at::Tensor &softmax_lse,            // b x h x seqlen_q
-           c10::optional<at::Tensor> &dq_,           // batch_size x seqlen_q x num_heads x head_size
-           c10::optional<at::Tensor> &dk_,           // batch_size x seqlen_k x num_heads_k x head_size
-           c10::optional<at::Tensor> &dv_,           // batch_size x seqlen_k x num_heads_k x head_size
-           c10::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
+           std::optional<at::Tensor> &dq_,           // batch_size x seqlen_q x num_heads x head_size
+           std::optional<at::Tensor> &dk_,           // batch_size x seqlen_k x num_heads_k x head_size
+           std::optional<at::Tensor> &dv_,           // batch_size x seqlen_k x num_heads_k x head_size
+           std::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
           const float p_dropout,                    // probability to drop
           const float softmax_scale,
           const bool is_causal,
--- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip
+++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_fwd_ck.hip
@ -146,15 +146,15 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tenso
 mha_fwd_ck(const at::Tensor &q,                      // batch_size x seqlen_q x num_heads x head_size
           const at::Tensor &k,                      // batch_size x seqlen_k x num_heads_k x head_size
           const at::Tensor &v,                      // batch_size x seqlen_k x num_heads_k x head_size
-           c10::optional<at::Tensor> &out_,          // batch_size x seqlen_q x num_heads xhead_size
-           c10::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
+           std::optional<at::Tensor> &out_,          // batch_size x seqlen_q x num_heads xhead_size
+           std::optional<at::Tensor> &alibi_slopes_, // num_heads or batch_size x num_heads
           const float p_dropout,
           const float softmax_scale,
           bool is_causal,
           int window_size_left,
           int window_size_right,
           const bool return_dropout_randval,
-           c10::optional<at::Generator> gen_)
+           std::optional<at::Generator> gen_)
 {
    auto q_dtype = q.dtype();
    TORCH_CHECK(q_dtype == at::kHalf || q_dtype == at::kBFloat16,
--- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_bwd_ck.hip
+++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_bwd_ck.hip
@ -43,7 +43,7 @@ fmha_bwd_args get_ck_fmha_varlen_bwd_args(const mask_info &mask,
                                          const at::Tensor v,
                                          const at::Tensor seqlens_q,
                                          const at::Tensor seqlens_k,
-                                          c10::optional<at::Tensor> &alibi_slopes_,
+                                          std::optional<at::Tensor> &alibi_slopes_,
                                          const at::Tensor out,
                                          const at::Tensor softmax_lse,
                                          const at::Tensor dout,
@ -209,12 +209,12 @@ mha_varlen_bwd_ck(const at::Tensor &dout,                   // total_q x num_hea
                  const at::Tensor &v,                      // total_k x num_heads_k x head_size, total_k := \sum_{i=0}^{b} s_i
                  const at::Tensor &out,                    // total_q x num_heads x head_size
                  const at::Tensor &softmax_lse,            // b x h x s   softmax logsumexp
-                  c10::optional<at::Tensor> &dq_,           // total_q x num_heads x head_size, total_q := \sum_{i=0}^{b} s_i
-                  c10::optional<at::Tensor> &dk_,           // total_k x num_heads_k x head_size, total_k := \sum_{i=0}^{b} s_i
-                  c10::optional<at::Tensor> &dv_,           // total_k x num_heads_k x head_size, total_k := \sum_{i=0}^{b} s_i
+                  std::optional<at::Tensor> &dq_,           // total_q x num_heads x head_size, total_q := \sum_{i=0}^{b} s_i
+                  std::optional<at::Tensor> &dk_,           // total_k x num_heads_k x head_size, total_k := \sum_{i=0}^{b} s_i
+                  std::optional<at::Tensor> &dv_,           // total_k x num_heads_k x head_size, total_k := \sum_{i=0}^{b} s_i
                  const at::Tensor &cu_seqlens_q,           // b+1
                  const at::Tensor &cu_seqlens_k,           // b+1
-                  c10::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
+                  std::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
                  const int max_seqlen_q,
                  const int max_seqlen_k, // max sequence length to choose the kernel
                  const float p_dropout,  // probability to drop
--- a/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip
+++ b/aten/src/ATen/native/transformers/hip/flash_attn/ck/mha_varlen_fwd_ck.hip
@ -42,7 +42,7 @@ fmha_fwd_args get_ck_fmha_varlen_fwd_args(bool has_lse,
                                          const at::Tensor v,
                                          const at::Tensor seqlens_q,
                                          const at::Tensor seqlens_k,
-                                          c10::optional<at::Tensor> &alibi_slopes_,
+                                          std::optional<at::Tensor> &alibi_slopes_,
                                          at::Tensor out,
                                          at::Tensor softmax_lse,
                                          at::Tensor dropout_randval,
@ -149,11 +149,11 @@ std::tuple<at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tensor, at::Tenso
 mha_varlen_fwd_ck(const at::Tensor &q,                   // total_q x num_heads x head_size, total_q := \sum_{i=0}^{b} s_i
                  const at::Tensor &k,             // total_k x num_heads_k x head_size, total_k := \sum_{i=0}^{b} s_i or num_blocks x page_block_size x num_heads_k x head_size if there's a block_table.
                  const at::Tensor &v,             // total_k x num_heads_k x head_size, total_k := \sum_{i=0}^{b} s_i or num_blocks x page_block_size x num_heads_k x head_size if there's a block_table.
-                  c10::optional<at::Tensor> &out_, // total_q x num_heads x head_size, total_k := \sum_{i=0}^{b} s_i
+                  std::optional<at::Tensor> &out_, // total_q x num_heads x head_size, total_k := \sum_{i=0}^{b} s_i
                  const at::Tensor &cu_seqlens_q,  // b+1
                  const at::Tensor &cu_seqlens_k,  // b+1
-                  c10::optional<at::Tensor> & /*seqused_k*/,
-                  c10::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
+                  std::optional<at::Tensor> & /*seqused_k*/,
+                  std::optional<at::Tensor> &alibi_slopes_, // num_heads or b x num_heads
                  int max_seqlen_q,
                  const int max_seqlen_k,
                  const float p_dropout,
@ -163,7 +163,7 @@ mha_varlen_fwd_ck(const at::Tensor &q,                   // total_q x num_heads
                  int window_size_left,
                  int window_size_right,
                  const bool return_dropout_randval,
-                  c10::optional<at::Generator> gen_)
+                  std::optional<at::Generator> gen_)
 {
    auto q_dtype = q.dtype();
    TORCH_CHECK(q_dtype == at::kHalf || q_dtype == at::kBFloat16,
--- a/test/benchmark_utils/callgrind_artifacts.json
+++ b/test/benchmark_utils/callgrind_artifacts.json
@ -532,7 +532,7 @@
        "7000 build/../c10/core/ScalarType.h:c10::TensorOptions::dtype(std::optional<c10::ScalarType>) const [clone .isra.469]",
        "7000 build/../c10/core/StorageImpl.h:c10::TensorImpl::release_resources()",
        "7000 build/../c10/core/TensorImpl.cpp:c10::TensorImpl::TensorImpl(c10::Storage&&, c10::DispatchKeySet, caffe2::TypeMeta const&) [/data/users/test_user/repos/pytorch/torch/lib/libc10.so]",
-        "7000 build/../c10/core/impl/VirtualGuardImpl.h:c10::optional_base<c10::impl::InlineDeviceGuard<c10::impl::VirtualGuardImpl> >::~optional_base()",
+        "7000 build/../c10/core/impl/VirtualGuardImpl.h:std::optional_base<c10::impl::InlineDeviceGuard<c10::impl::VirtualGuardImpl> >::~optional_base()",
        "7000 build/../c10/util/intrusive_ptr.h:torch::autograd::utils::wrap(at::Tensor)",
        "7000 build/../c10/util/llvmMathExtras.h:at::Tensor::fill_(c10::Scalar) const",
        "7000 build/../c10/util/llvmMathExtras.h:at::Tensor::is_complex() const",
@ -1059,7 +1059,7 @@
        "7000 build/../c10/core/ScalarType.h:c10::TensorOptions::dtype(std::optional<c10::ScalarType>) const [clone .isra.469]",
        "7000 build/../c10/core/StorageImpl.h:c10::TensorImpl::release_resources()",
        "7000 build/../c10/core/TensorImpl.cpp:c10::TensorImpl::TensorImpl(c10::Storage&&, c10::DispatchKeySet, caffe2::TypeMeta const&) [/data/users/test_user/repos/pytorch/torch/lib/libc10.so]",
-        "7000 build/../c10/core/impl/VirtualGuardImpl.h:c10::optional_base<c10::impl::InlineDeviceGuard<c10::impl::VirtualGuardImpl> >::~optional_base()",
+        "7000 build/../c10/core/impl/VirtualGuardImpl.h:std::optional_base<c10::impl::InlineDeviceGuard<c10::impl::VirtualGuardImpl> >::~optional_base()",
        "7000 build/../c10/util/intrusive_ptr.h:torch::autograd::utils::wrap(at::Tensor)",
        "7000 build/../c10/util/llvmMathExtras.h:at::Tensor::fill_(c10::Scalar) const",
        "7000 build/../c10/util/llvmMathExtras.h:at::Tensor::is_complex() const",