[caffe2] Fix compiling ATen-hip in non-opt mode (#132581)

Summary: It looks like https://github.com/pytorch/pytorch/pull/131894 accidentally broke non-opt hip builds. I.e. `is_flash_attention_available` doesn't get inlined in non-opt mode, so all of `can_use_flash_attention` is compiled into the final object file. This includes a reference to `aotriton::v2::flash::check_gpu` which we haven't setup yet for HIP builds. Test Plan: CI Differential Revision: D60720707 Pull Request resolved: https://github.com/pytorch/pytorch/pull/132581 Approved by: https://github.com/jianyuh, https://github.com/xw285cornell
2025-10-20 21:14:14 +08:00 · 2024-08-04 07:51:18 +00:00
parent 522fa03e91
commit 2714adce20
1 changed files with 6 additions and 6 deletions
--- a/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
+++ b/aten/src/ATen/native/transformers/cuda/sdp_utils.cpp
@ -570,13 +570,12 @@ bool is_flash_attention_available() {
 }

 bool can_use_flash_attention(sdp_params const& params, bool debug) {
-  if (!is_flash_attention_available()) {
-    if (debug) {
-      TORCH_WARN("Torch was not compiled with flash attention.");
-    }
-    return false;
+#ifndef USE_FLASH_ATTENTION
+  if (debug) {
+    TORCH_WARN("Torch was not compiled with flash attention.");
  }
-
+  return false;
+#else // defined(USE_FLASH_ATTENTION)
  // Define gate functions that determine if a flash kernel can be ran
  // Replace with std::to_array when we migrate to c++20
  constexpr auto general_constraints = array_of<bool (*)(sdp_params const&, bool)>(
@ -618,6 +617,7 @@ bool can_use_flash_attention(sdp_params const& params, bool debug) {
    }
  }
  return true;
+#endif // defined(USE_FLASH_ATTENTION)
 }

 bool can_use_mem_efficient_attention(sdp_params const& params, bool debug) {