mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-13 10:55:01 +08:00
This adds zero-bubble / DualPipeV support for (S)AC Before: - AC will always retrigger recompute upon every distinct backward. After: - Any checkpointed regions encountered by backward under the same instance of this context manager will only trigger recompute at most once, even if there are multiple calls to backward. - Backward calls under the same instance of this context manager must execute over non-overlapping regions of the backward graph even if retain_graph=True. Pull Request resolved: https://github.com/pytorch/pytorch/pull/166536 Approved by: https://github.com/albanD
86 lines
2.0 KiB
C++
86 lines
2.0 KiB
C++
#pragma once
|
|
|
|
#include <c10/core/SafePyObject.h>
|
|
#include <c10/macros/Export.h>
|
|
#include <optional>
|
|
|
|
namespace c10 {
|
|
|
|
// Structure used to pack all the thread local boolean
|
|
// flags used by autograd
|
|
struct C10_API AutogradState {
|
|
static AutogradState& get_tls_state();
|
|
static void set_tls_state(AutogradState state);
|
|
|
|
AutogradState(
|
|
bool grad_mode,
|
|
bool inference_mode,
|
|
bool fw_grad_mode,
|
|
bool multithreading_enabled)
|
|
: graph_exec_group_(std::nullopt),
|
|
grad_mode_(grad_mode),
|
|
inference_mode_(inference_mode),
|
|
fw_grad_mode_(fw_grad_mode),
|
|
multithreading_enabled_(multithreading_enabled),
|
|
view_replay_enabled_(false) {}
|
|
|
|
void set_grad_mode(bool enabled) {
|
|
grad_mode_ = enabled;
|
|
}
|
|
|
|
void set_fw_grad_mode(bool enabled) {
|
|
fw_grad_mode_ = enabled;
|
|
}
|
|
|
|
void set_inference_mode(bool enabled) {
|
|
inference_mode_ = enabled;
|
|
}
|
|
|
|
void set_multithreading_enabled(bool multithreading_enabled) {
|
|
multithreading_enabled_ = multithreading_enabled;
|
|
}
|
|
|
|
void set_view_replay_enabled(bool view_replay_enabled) {
|
|
view_replay_enabled_ = view_replay_enabled;
|
|
}
|
|
|
|
void set_graph_exec_group(std::optional<SafePyObject> group) {
|
|
graph_exec_group_ = std::move(group);
|
|
}
|
|
|
|
bool get_grad_mode() const {
|
|
return grad_mode_;
|
|
}
|
|
|
|
bool get_fw_grad_mode() const {
|
|
return fw_grad_mode_;
|
|
}
|
|
|
|
bool get_inference_mode() const {
|
|
return inference_mode_;
|
|
}
|
|
|
|
bool get_multithreading_enabled() const {
|
|
return multithreading_enabled_;
|
|
}
|
|
|
|
bool get_view_replay_enabled() const {
|
|
return view_replay_enabled_;
|
|
}
|
|
|
|
const std::optional<SafePyObject>& get_graph_exec_group() const {
|
|
return graph_exec_group_;
|
|
}
|
|
|
|
private:
|
|
std::optional<SafePyObject> graph_exec_group_;
|
|
bool grad_mode_ : 1;
|
|
bool inference_mode_ : 1;
|
|
bool fw_grad_mode_ : 1;
|
|
bool multithreading_enabled_ : 1;
|
|
// NOLINTNEXTLINE(cppcoreguidelines-use-default-member-init)
|
|
bool view_replay_enabled_ : 1;
|
|
};
|
|
|
|
} // namespace c10
|