From 24c904951c9b0c9901b03dcf8b4f92ebf6cec241 Mon Sep 17 00:00:00 2001
From: Ailing Zhang <ailzhang@fb.com>
Date: Fri, 2 Apr 2021 11:43:57 -0700
Subject: [PATCH] Replace AutoNonVariableTypeMode with InferenceMode in fbcode.
 (#55114)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/55114

Test Plan: CI

Reviewed By: ezyang, bhosmer

Differential Revision: D27472768

fbshipit-source-id: 76f17ef7de40f6e04e2968f8958027b5f93e1c0c
---
 aten/src/ATen/ATen.h                                |  1 +
 aten/src/ATen/core/LegacyTypeDispatch.h             |  2 +-
 aten/src/ATen/core/boxing/KernelFunction.cpp        |  2 +-
 .../ATen/native/metal/mpscnn/tests/MPSCNNTests.mm   |  2 +-
 aten/src/ATen/test/vulkan_api_test.cpp              |  8 ++++----
 benchmarks/cpp/tensorexpr/bench_fuser_overhead.cpp  |  4 ++--
 binaries/compare_models_torch.cc                    |  2 +-
 binaries/lite_interpreter_model_load.cc             |  2 +-
 binaries/speed_benchmark_torch.cc                   | 13 ++++++-------
 ios/TestApp/TestApp/Benchmark.mm                    |  3 +--
 ios/TestApp/TestAppTests/TestAppTests.mm            |  3 +--
 test/custom_operator/test_custom_ops.cpp            |  6 +++---
 test/mobile/custom_build/predictor.cpp              | 10 ++--------
 test/mobile/op_deps/main.cc                         |  3 +--
 14 files changed, 26 insertions(+), 35 deletions(-)
diff --git a/aten/src/ATen/ATen.h b/aten/src/ATen/ATen.h
index 913abf30bde9..d1eb1e7e9d2b 100644
--- a/aten/src/ATen/ATen.h
+++ b/aten/src/ATen/ATen.h
@@ -31,3 +31,4 @@
 #include <ATen/core/UnsafeFromTH.h>
 #include <ATen/core/ivalue.h>
 #include <ATen/core/jit_type.h>
+#include <c10/core/InferenceMode.h>
diff --git a/aten/src/ATen/core/LegacyTypeDispatch.h b/aten/src/ATen/core/LegacyTypeDispatch.h
index 03ffb68ad9dd..c48bbe3d5ae8 100644
--- a/aten/src/ATen/core/LegacyTypeDispatch.h
+++ b/aten/src/ATen/core/LegacyTypeDispatch.h
@@ -43,7 +43,7 @@ namespace at {
 // trace).  To unify the two, we would first have to move profiling and tracing
 // out of VariableType.
 
-// TODO: rename this guard and make it internal only
+// TODO: rename this guard and make it internal for kernel implementation only
 struct TORCH_API AutoNonVariableTypeMode {
   // NB: The enabled parameter must ALWAYS be black, as Henry Ford used to say.
   // TODO: Eliminate this parameter entirely
diff --git a/aten/src/ATen/core/boxing/KernelFunction.cpp b/aten/src/ATen/core/boxing/KernelFunction.cpp
index c494280e52c6..b0bc48f7b256 100644
--- a/aten/src/ATen/core/boxing/KernelFunction.cpp
+++ b/aten/src/ATen/core/boxing/KernelFunction.cpp
@@ -26,7 +26,7 @@ void ambiguous_autogradother_kernel(OperatorKernel*, const OperatorHandle& op, D
     "(see Note [Ambiguity in AutogradOther kernel]). "
     "If you want to override CompositeImplicitAutograd, please open an issue to request a dedicated "
     "Autograd dispatch key for the backend.\n",
-    "If you only want to run inference instead of training, add `at::AutoNonVariableTypeMode guard(true);` "
+    "If you only want to run inference instead of training, add `c10::InferenceMode mode;` "
     "before model.forward(). Note this guard is only available in C++ but not Python at present.",
     "\nCanonical state\n~~~~~~~~~~~\n", op.dumpState(), "\n\n");
 }
diff --git a/aten/src/ATen/native/metal/mpscnn/tests/MPSCNNTests.mm b/aten/src/ATen/native/metal/mpscnn/tests/MPSCNNTests.mm
index a732257528ea..aa1894454289 100644
--- a/aten/src/ATen/native/metal/mpscnn/tests/MPSCNNTests.mm
+++ b/aten/src/ATen/native/metal/mpscnn/tests/MPSCNNTests.mm
@@ -67,7 +67,7 @@ bool TEST(const std::vector<int64_t>& sizes, std::string name, Func block) {
   std::stringstream ss;
   std::copy(sizes.begin(), sizes.end(), std::ostream_iterator<int>(ss, " "));
   __block std::string str1 = ss.str();
-  at::AutoNonVariableTypeMode guard(true);
+  c10::InferenceMode guard;
   bool b = block();
   void (^print)(NSString*) = ^(NSString* result) {
     NSLog(@"[%s],[%s],[%@]", name.c_str(), str1.c_str(), result);
diff --git a/aten/src/ATen/test/vulkan_api_test.cpp b/aten/src/ATen/test/vulkan_api_test.cpp
index 33c43eae525d..9170247efb91 100644
--- a/aten/src/ATen/test/vulkan_api_test.cpp
+++ b/aten/src/ATen/test/vulkan_api_test.cpp
@@ -70,7 +70,7 @@ TEST(VulkanAPITest, adaptive_avg_pool2d) {
   if (!at::is_vulkan_available()) {
     return;
   }
-  at::AutoNonVariableTypeMode nonVarTypeModeGuard(true);
+  c10::InferenceMode mode;
 
   const auto in_cpu = at::rand({5, 7, 47, 31}, at::TensorOptions(at::kCPU).dtype(at::kFloat));
   const auto out_cpu = at::adaptive_avg_pool2d(in_cpu, {3, 3});
@@ -1171,7 +1171,7 @@ TEST(VulkanAPITest, reshape) {
   if (!at::is_vulkan_available()) {
     return;
   }
-  at::AutoNonVariableTypeMode nonVarTypeModeGuard(true);
+  c10::InferenceMode mode;
 
   const auto in_cpu = at::rand({47, 11, 83, 97}, at::device(at::kCPU).dtype(at::kFloat));
   const auto in_vulkan = in_cpu.vulkan();
@@ -1193,7 +1193,7 @@ TEST(VulkanAPITest, reshape_) {
   if (!at::is_vulkan_available()) {
     return;
   }
-  at::AutoNonVariableTypeMode nonVarTypeModeGuard(true);
+  c10::InferenceMode mode;
 
   const auto cpu = at::rand({59, 41, 19, 67}, at::device(at::kCPU).dtype(at::kFloat));
   const auto vulkan = cpu.vulkan();
@@ -1626,7 +1626,7 @@ TEST(VulkanAPITest, mobilenetv2) {
   if (!at::is_vulkan_available()) {
     return;
   }
-  at::AutoNonVariableTypeMode nonVarTypeModeGuard(true);
+  c10::InferenceMode mode;
 
   MobileNetV2 mn2;
 
diff --git a/benchmarks/cpp/tensorexpr/bench_fuser_overhead.cpp b/benchmarks/cpp/tensorexpr/bench_fuser_overhead.cpp
index 1ce66747f2f0..a458f5f8d0d5 100644
--- a/benchmarks/cpp/tensorexpr/bench_fuser_overhead.cpp
+++ b/benchmarks/cpp/tensorexpr/bench_fuser_overhead.cpp
@@ -1,6 +1,7 @@
 #include <benchmark/benchmark.h>
 #include <torch/csrc/jit/codegen/fuser/interface.h>
 #include <torch/torch.h>
+#include <c10/core/InferenceMode.h>
 
 using namespace torch::jit;
 
@@ -10,8 +11,7 @@ def two_adds(self, x: Tensor, y: Tensor, z: Tensor) -> Tensor:
 )JIT";
 
 static void FusedOverhead(benchmark::State& state) {
-  torch::NoGradGuard ng;
-  torch::AutoNonVariableTypeMode nv;
+  c10::InferenceMode mode;
   overrideCanFuseOnCPU(true);
 
   Module m("m");
diff --git a/binaries/compare_models_torch.cc b/binaries/compare_models_torch.cc
index 9dbce72d0e83..7076626cc044 100644
--- a/binaries/compare_models_torch.cc
+++ b/binaries/compare_models_torch.cc
@@ -224,7 +224,7 @@ int main(int argc, char** argv) {
   float tolerance = 0;
   ss >> tolerance;
 
-  at::AutoNonVariableTypeMode nonVarTypeModeGuard(true);
+  c10::InferenceMode mode;
   torch::autograd::AutoGradMode guard(false);
   torch::jit::GraphOptimizerEnabledGuard no_optimizer_guard(false);
   auto module = torch::jit::load(FLAGS_model);
diff --git a/binaries/lite_interpreter_model_load.cc b/binaries/lite_interpreter_model_load.cc
index e82d85b96dba..b525e8fd0e3c 100644
--- a/binaries/lite_interpreter_model_load.cc
+++ b/binaries/lite_interpreter_model_load.cc
@@ -27,7 +27,7 @@ int main(int argc, char** argv) {
 
   // TODO: avoid having to set this guard for custom mobile build with mobile
   // interpreter.
-  torch::AutoNonVariableTypeMode non_var_guard{true};
+  c10::InferenceMode mode;
   torch::jit::mobile::Module bc = torch::jit::_load_for_mobile(FLAGS_model);
   return 0;
 }
diff --git a/binaries/speed_benchmark_torch.cc b/binaries/speed_benchmark_torch.cc
index f8db31436801..e5990560b346 100644
--- a/binaries/speed_benchmark_torch.cc
+++ b/binaries/speed_benchmark_torch.cc
@@ -17,14 +17,14 @@
 #include <string>
 #include <vector>
 
-#include "ATen/ATen.h"
+#include <ATen/ATen.h>
 #include "caffe2/core/timer.h"
 #include "caffe2/utils/string_utils.h"
-#include "torch/csrc/autograd/grad_mode.h"
-#include "torch/csrc/jit/serialization/import.h"
-#include "torch/script.h"
+#include <torch/csrc/autograd/grad_mode.h>
+#include <torch/csrc/jit/serialization/import.h>
+#include <torch/script.h>
 
-#include "c10/mobile/CPUCachingAllocator.h"
+#include <c10/mobile/CPUCachingAllocator.h>
 
 #include <chrono>
 using namespace std::chrono;
@@ -209,8 +209,7 @@ int main(int argc, char** argv) {
 
   std::vector<c10::IValue> inputs = create_inputs();
 
-  at::AutoNonVariableTypeMode nonVarTypeModeGuard(true);
-  torch::autograd::AutoGradMode guard(false);
+  c10::InferenceMode mode;
   torch::jit::GraphOptimizerEnabledGuard no_optimizer_guard(false);
   auto module = torch::jit::load(FLAGS_model);
 
diff --git a/ios/TestApp/TestApp/Benchmark.mm b/ios/TestApp/TestApp/Benchmark.mm
index 74e0e3762f12..664bad121031 100644
--- a/ios/TestApp/TestApp/Benchmark.mm
+++ b/ios/TestApp/TestApp/Benchmark.mm
@@ -65,11 +65,10 @@ static int iter = 10;
     }
   }
 
-  torch::autograd::AutoGradMode guard(false);
+  c10::InferenceMode mode;
   torch::jit::GraphOptimizerEnabledGuard opguard(false);
   auto module = torch::jit::load(model);
 
-  at::AutoNonVariableTypeMode non_var_type_mode(true);
   module.eval();
   if (print_output) {
     std::cout << module.forward(inputs) << std::endl;
diff --git a/ios/TestApp/TestAppTests/TestAppTests.mm b/ios/TestApp/TestAppTests/TestAppTests.mm
index 2505439b8aae..818f148c0488 100644
--- a/ios/TestApp/TestAppTests/TestAppTests.mm
+++ b/ios/TestApp/TestAppTests/TestAppTests.mm
@@ -25,10 +25,9 @@
 
 - (void)testForward {
   _module.eval();
+  c10::InferenceMode mode;
   std::vector<c10::IValue> inputs;
   inputs.push_back(torch::ones({1, 3, 224, 224}, at::ScalarType::Float));
-  torch::autograd::AutoGradMode guard(false);
-  at::AutoNonVariableTypeMode nonVarTypeModeGuard(true);
   auto outputTensor = _module.forward(inputs).toTensor();
   float* outputBuffer = outputTensor.data_ptr<float>();
   XCTAssertTrue(outputBuffer != nullptr, @"");
diff --git a/test/custom_operator/test_custom_ops.cpp b/test/custom_operator/test_custom_ops.cpp
index f49d24af893a..71c90fa902dd 100644
--- a/test/custom_operator/test_custom_ops.cpp
+++ b/test/custom_operator/test_custom_ops.cpp
@@ -80,8 +80,8 @@ void get_autograd_operator_from_registry_and_execute() {
   TORCH_INTERNAL_ASSERT(torch::allclose(z.grad(), torch::ones({5,5})));
 }
 
-void get_autograd_operator_from_registry_and_execute_in_nograd_mode() {
-  at::AutoNonVariableTypeMode _var_guard(true);
+void get_autograd_operator_from_registry_and_execute_in_inference_mode() {
+  c10::InferenceMode guard;
 
   torch::Tensor x = torch::randn({5,5}, torch::requires_grad());
   torch::Tensor y = torch::randn({5,5}, torch::requires_grad());
@@ -185,7 +185,7 @@ int main(int argc, const char* argv[]) {
 
   get_operator_from_registry_and_execute();
   get_autograd_operator_from_registry_and_execute();
-  get_autograd_operator_from_registry_and_execute_in_nograd_mode();
+  get_autograd_operator_from_registry_and_execute_in_inference_mode();
   load_serialized_module_with_custom_op_and_execute(
       path_to_exported_script_module);
   test_argument_checking_for_serialized_modules(path_to_exported_script_module);
diff --git a/test/mobile/custom_build/predictor.cpp b/test/mobile/custom_build/predictor.cpp
index d3d998c3939b..aaae9062f777 100644
--- a/test/mobile/custom_build/predictor.cpp
+++ b/test/mobile/custom_build/predictor.cpp
@@ -11,14 +11,8 @@ using namespace std;
 namespace {
 
 struct MobileCallGuard {
-  // AutoGrad is disabled for mobile by default.
-  torch::autograd::AutoGradMode no_autograd_guard{false};
-  // VariableType dispatch is not included in default mobile build. We need set
-  // this guard globally to avoid dispatch error (only for dynamic dispatch).
-  // Thanks to the unification of Variable class and Tensor class it's no longer
-  // required to toggle the NonVariableTypeMode per op - so it doesn't hurt to
-  // always set NonVariableTypeMode for inference only use case.
-  torch::AutoNonVariableTypeMode non_var_guard{true};
+  // Set InferenceMode for inference only use case.
+  c10::InferenceMode guard;
   // Disable graph optimizer to ensure list of unused ops are not changed for
   // custom mobile build.
   torch::jit::GraphOptimizerEnabledGuard no_optimizer_guard{false};
diff --git a/test/mobile/op_deps/main.cc b/test/mobile/op_deps/main.cc
index d805edd7aba8..99821f39b5e0 100644
--- a/test/mobile/op_deps/main.cc
+++ b/test/mobile/op_deps/main.cc
@@ -4,8 +4,7 @@
 #include "simple_ops.h"
 
 int main() {
-  torch::autograd::AutoGradMode guard(false);
-  at::AutoNonVariableTypeMode non_var_type_mode(true);
+  c10::InferenceMode guard;
   auto input = torch::empty({1, 3, 224, 224});
   at::call_AA_op(input);
   at::call_BB_op(input);