mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[caffe2] Fix compiling ATen-hip in non-opt mode (#132581)
Summary: It looks like https://github.com/pytorch/pytorch/pull/131894 accidentally broke non-opt hip builds. I.e. `is_flash_attention_available` doesn't get inlined in non-opt mode, so all of `can_use_flash_attention` is compiled into the final object file. This includes a reference to `aotriton::v2::flash::check_gpu` which we haven't setup yet for HIP builds. Test Plan: CI Differential Revision: D60720707 Pull Request resolved: https://github.com/pytorch/pytorch/pull/132581 Approved by: https://github.com/jianyuh, https://github.com/xw285cornell
This commit is contained in:
committed by
PyTorch MergeBot
parent
522fa03e91
commit
2714adce20
@ -570,13 +570,12 @@ bool is_flash_attention_available() {
|
||||
}
|
||||
|
||||
bool can_use_flash_attention(sdp_params const& params, bool debug) {
|
||||
if (!is_flash_attention_available()) {
|
||||
if (debug) {
|
||||
TORCH_WARN("Torch was not compiled with flash attention.");
|
||||
}
|
||||
return false;
|
||||
#ifndef USE_FLASH_ATTENTION
|
||||
if (debug) {
|
||||
TORCH_WARN("Torch was not compiled with flash attention.");
|
||||
}
|
||||
|
||||
return false;
|
||||
#else // defined(USE_FLASH_ATTENTION)
|
||||
// Define gate functions that determine if a flash kernel can be ran
|
||||
// Replace with std::to_array when we migrate to c++20
|
||||
constexpr auto general_constraints = array_of<bool (*)(sdp_params const&, bool)>(
|
||||
@ -618,6 +617,7 @@ bool can_use_flash_attention(sdp_params const& params, bool debug) {
|
||||
}
|
||||
}
|
||||
return true;
|
||||
#endif // defined(USE_FLASH_ATTENTION)
|
||||
}
|
||||
|
||||
bool can_use_mem_efficient_attention(sdp_params const& params, bool debug) {
|
||||
|
Reference in New Issue
Block a user