Files
pytorch/c10/core/AutogradState.h
soulitzer 4957ae5838 Add API to annotate disjoint backward and handle in AC (#166536)
This adds zero-bubble / DualPipeV support for (S)AC

Before:
- AC will always retrigger recompute upon every distinct backward.

After:
- Any checkpointed regions encountered by backward under the same instance of this context manager will only trigger recompute at most once, even if there are multiple calls to backward.
- Backward calls under the same instance of this context manager must execute over non-overlapping regions of the backward graph even if retain_graph=True.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/166536
Approved by: https://github.com/albanD
2025-11-08 00:21:25 +00:00

86 lines
2.0 KiB
C++

#pragma once
#include <c10/core/SafePyObject.h>
#include <c10/macros/Export.h>
#include <optional>
namespace c10 {
// Structure used to pack all the thread local boolean
// flags used by autograd
struct C10_API AutogradState {
static AutogradState& get_tls_state();
static void set_tls_state(AutogradState state);
AutogradState(
bool grad_mode,
bool inference_mode,
bool fw_grad_mode,
bool multithreading_enabled)
: graph_exec_group_(std::nullopt),
grad_mode_(grad_mode),
inference_mode_(inference_mode),
fw_grad_mode_(fw_grad_mode),
multithreading_enabled_(multithreading_enabled),
view_replay_enabled_(false) {}
void set_grad_mode(bool enabled) {
grad_mode_ = enabled;
}
void set_fw_grad_mode(bool enabled) {
fw_grad_mode_ = enabled;
}
void set_inference_mode(bool enabled) {
inference_mode_ = enabled;
}
void set_multithreading_enabled(bool multithreading_enabled) {
multithreading_enabled_ = multithreading_enabled;
}
void set_view_replay_enabled(bool view_replay_enabled) {
view_replay_enabled_ = view_replay_enabled;
}
void set_graph_exec_group(std::optional<SafePyObject> group) {
graph_exec_group_ = std::move(group);
}
bool get_grad_mode() const {
return grad_mode_;
}
bool get_fw_grad_mode() const {
return fw_grad_mode_;
}
bool get_inference_mode() const {
return inference_mode_;
}
bool get_multithreading_enabled() const {
return multithreading_enabled_;
}
bool get_view_replay_enabled() const {
return view_replay_enabled_;
}
const std::optional<SafePyObject>& get_graph_exec_group() const {
return graph_exec_group_;
}
private:
std::optional<SafePyObject> graph_exec_group_;
bool grad_mode_ : 1;
bool inference_mode_ : 1;
bool fw_grad_mode_ : 1;
bool multithreading_enabled_ : 1;
// NOLINTNEXTLINE(cppcoreguidelines-use-default-member-init)
bool view_replay_enabled_ : 1;
};
} // namespace c10