[compiled autograd] match eager behavior for inplace detached activations (#134186)

Fixes `TestAutograd.test_saved_variable_saved_original_inplace_detach` when ran under compiled autograd

Pull Request resolved: https://github.com/pytorch/pytorch/pull/134186
Approved by: https://github.com/jansel
This commit is contained in:
Simon Fan
2024-08-21 17:55:33 -07:00
committed by PyTorch MergeBot
parent ccafc93be5
commit 0d9208a398
5 changed files with 36 additions and 12 deletions

View File

@ -2483,13 +2483,14 @@ skipped_tests = {
}
known_failing_tests = {
"test_current_graph_task_execution_order", # torch._dynamo.exc.TorchRuntimeError: Failed running call_function <
"test_input_buffer_accum", # RuntimeError: Cannot access data pointer of Tensor that doesn't have storage
"test_graph_save_on_cpu_cuda", # AssertionError: 0 not greater than 0
"test_graph_save_on_cpu", # torch._dynamo.exc.BackendCompilerFailed: backend='inner_compiler' raised:
"test_reentrant_with_leaf_variable_hook", # torch._dynamo.exc.Unsupported: inline in skipfiles: RemovableHandle.
"test_reentrant_with_non_leaf_variable_hook", # torch._dynamo.exc.Unsupported: inline in skipfiles: RemovableHan
"test_saved_variable_saved_original_inplace_detach", # AssertionError: RuntimeError not raised
# Category: Compiled autograd
"test_current_graph_task_execution_order", # nodes are already freed by the time dynamo traces the lifted hook
"test_reentrant_with_leaf_variable_hook", # hangs when enabled with graph breaks
"test_reentrant_with_non_leaf_variable_hook", # hangs when enabled with graph breaks
# Category: Inductor
"test_input_buffer_accum", # does not support sparse_grad=True: https://github.com/pytorch/pytorch/issues/120267
"test_graph_save_on_cpu", # does not support pin_memory: https://github.com/pytorch/pytorch/issues/134173
# Uncategorized
"test_saving_variable_to_disk", # Cannot call numel() on tensor with symbolic sizes/strides
"test_setitem_mask", # torch.fx.experimental.symbolic_shapes.GuardOnDataDependentSymNode: It appears that you're
"test_wrapped_number_saved_variable_hooks", # RuntimeError: this hook should not be called

View File

@ -566,6 +566,7 @@ def process_function(info: DifferentiabilityInfo, template: CodeTemplate) -> str
should_append_getsetdef = True
should_append_raw_getsetdef = False
visit_name = name
uses_cpp_saved_variable_cls = False
if (
type == BaseCType(tensorT)
@ -573,6 +574,7 @@ def process_function(info: DifferentiabilityInfo, template: CodeTemplate) -> str
or type == MutRefCType(OptionalCType(BaseCType(tensorT)))
or (type == BaseCType(scalarT) and is_output)
):
uses_cpp_saved_variable_cls = True
saved_variables.append(f"SavedVariable {name}_;")
release_variables.append(f"{name}_.reset_data();")
ptr = "shared_from_this()" if is_output else ""
@ -606,6 +608,7 @@ def process_function(info: DifferentiabilityInfo, template: CodeTemplate) -> str
assert (
info.func.func.name.name.base.startswith("_foreach") and is_output
)
uses_cpp_saved_variable_cls = True
saved_variables.append(f"std::vector<SavedVariable> {name}_;")
saved_variables.append(f"bool {name}_released_ = false;")
# Just clear() is sufficient, we don't need to loop and clear each variable.
@ -628,6 +631,7 @@ def process_function(info: DifferentiabilityInfo, template: CodeTemplate) -> str
should_append_raw_getsetdef = True
visit_name = f"{name}_"
elif type == ListCType(OptionalCType(BaseCType(tensorT))):
uses_cpp_saved_variable_cls = True
saved_variables.append(f"std::vector<SavedVariable> {name}_;")
saved_variables.append(f"bool {name}_released_ = false;")
# Just clear() is sufficient, we don't need to loop and clear each variable.
@ -790,7 +794,12 @@ PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
PY_RAW_GETSETDEF_STRUCT.substitute(op=info.op, name=name)
)
compiled_args.append(f"args.collect({visit_name});")
if uses_cpp_saved_variable_cls:
compiled_args.append(
f"args.collect({visit_name}, {'true' if is_output else 'false'});"
)
else:
compiled_args.append(f"args.collect({visit_name});")
apply_with_saved_before.append(f"saved.before({visit_name});")
apply_with_saved_after.append(f"saved.after({visit_name});")

View File

@ -206,7 +206,8 @@ struct CppNode : public Node {
args.collect(ctx_.saved_data);
TORCH_INTERNAL_ASSERT(ctx_.non_differentiable_.empty());
TORCH_INTERNAL_ASSERT(ctx_.dirty_inputs_.empty());
args.collect(ctx_.saved_variables_);
args.collect(
ctx_.saved_variables_, true); // always unpacked as output in eager
TORCH_INTERNAL_ASSERT(ctx_.to_save_.empty());
args.collect(ctx_.materialize_grads_);
args.collect(ctx_.has_freed_buffers_);

View File

@ -330,7 +330,7 @@ void PyNode::compiled_args(CompiledNodeArgs& args) {
args.collect(f->compiled_autograd_symints);
args.set_default_dyn_type(prior);
args.collect(f->saved_variables);
args.collect(f->saved_variables, true); // always unpacked as output in eager
args.collect(f->materialize_grads);
args.collect(f->is_variable_input);
args.collect(f->needs_input_grad);

View File

@ -229,12 +229,19 @@ class CompiledNodeArgs {
void collect(const at::Tensor& t) {
collect(_compiler.tensor_args.add(t));
}
void collect(const SavedVariable& t) {
collect(_compiler.tensor_args.add(t, _node_call.node));
void collect(const SavedVariable& sv, bool is_output) {
collect(
_compiler.tensor_args.add(sv, is_output ? _node_call.node : nullptr));
}
void collect(const c10::SymInt& t) {
_compiler.add_size_input(t);
}
void collect(const std::vector<SavedVariable>& t, bool is_output) {
collect_size(t.size());
for (const SavedVariable& i : t) {
collect(i, is_output);
}
}
template <typename T>
void collect(const std::vector<T>& t) {
collect_size(t.size());
@ -242,6 +249,12 @@ class CompiledNodeArgs {
collect(i);
}
}
void collect(const c10::ArrayRef<SavedVariable>& t, bool is_output) {
collect_size(t.size());
for (const SavedVariable& i : t) {
collect(i, is_output);
}
}
template <typename T>
void collect(const c10::ArrayRef<T>& t) {
collect_size(t.size());