Rename DefaultBackend to CompositeExplicitAutograd (#54470)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/54470 ``` git grep -l 'DefaultBackend' | xargs sed -i 's/DefaultBackend/CompositeExplicitAutograd/g' ``` Plus a quick fixup in native/README.md Signed-off-by: Edward Z. Yang <ezyang@fb.com> Test Plan: Imported from OSS Reviewed By: bdhirsh Differential Revision: D27253240 Pulled By: ezyang fbshipit-source-id: 964df951ea8b52fa72937f3cc66aeaf49a702e6f
2025-10-20 21:14:14 +08:00 · 2021-03-26 10:50:06 -07:00
parent 70dd2a2bdd
commit 13b1ca9466
18 changed files with 328 additions and 325 deletions
--- a/BUILD.bazel
+++ b/BUILD.bazel
@ -132,7 +132,7 @@ genrule(
        "aten/src/ATen/RegisterSparseCPU.cpp",
        "aten/src/ATen/RegisterCompositeImplicitAutograd.cpp",
        "aten/src/ATen/RegisterMeta.cpp",
-        "aten/src/ATen/RegisterDefaultBackend.cpp",
+        "aten/src/ATen/RegisterCompositeExplicitAutograd.cpp",
        "aten/src/ATen/RegisterSchema.cpp",
        "aten/src/ATen/CPUFunctions.h",
        "aten/src/ATen/CUDAFunctions.h",
--- a/aten/src/ATen/core/dispatch/OperatorEntry.cpp
+++ b/aten/src/ATen/core/dispatch/OperatorEntry.cpp
@ -183,16 +183,16 @@ std::pair<const AnnotatedKernel&, const char*> OperatorEntry::computeDispatchTab
  // For any dispatch key, it'll pick a kernel using the following order:
  //  (1) Use kernel if it's directly registered to this key
  //  (2) Handle runtime keys that have kernels available from alias keys
-  //    (2.1) Use kernel from DispatchKey::DefaultBackend if available.
+  //    (2.1) Use kernel from DispatchKey::CompositeExplicitAutograd if available.
  //          This is used to register a kernel that works for all backend in inference. But it requires
  //          separate registration for Autograd keys to support training.
  //    (2.2) Use kernel from DispatchKey::CompositeImplicitAutograd if available.
  //          For autograd keys, we only use kernel from CompositeImplicitAutograd when there's no direct registration
-  //          to its corresponding backend key or DefaultBackend. See Note [DefaultBackend and CompositeImplicitAutograd].
+  //          to its corresponding backend key or CompositeExplicitAutograd. See Note [CompositeExplicitAutograd and CompositeImplicitAutograd].
  //          For AutogradOther, we eagerly return ambiguousAutogradOtherKernel_ if there's registration to any of
  //          its backends and ask backend extender to request a decicated Autograd key for the backend.
  //          See Note [Ambiguity in AutogradOther kernel] for more details.
-  //          A DefaultBackend kernel prevents CompositeImplicitAutograd kernel being used for Autograd keys, but it doesn't
+  //          A CompositeExplicitAutograd kernel prevents CompositeImplicitAutograd kernel being used for Autograd keys, but it doesn't
  //          cause confusion for AutogradOther. It's pretty straightforward to use Autograd (if available)
  //          in this case.
  //    (2.3) Use kernel from DispatchKey::Autograd if available
@ -201,11 +201,11 @@ std::pair<const AnnotatedKernel&, const char*> OperatorEntry::computeDispatchTab
  //    backend key. See Note [Refresh Runtime Autograd entries in dispatchTable_]
  //  (3) Use fallthrough kernel that are registered as fallback.
  // Alias Key Precedence:
-  //   DefaultBackend > CompositeImplicitAutograd > Autograd
-  // Note [DefaultBackend and CompositeImplicitAutograd]
-  //   When there're registrations to both DefaultBackend & CompositeImplicitAutograd & Autograd, from (2.2) we know DefaultBackend
+  //   CompositeExplicitAutograd > CompositeImplicitAutograd > Autograd
+  // Note [CompositeExplicitAutograd and CompositeImplicitAutograd]
+  //   When there're registrations to both CompositeExplicitAutograd & CompositeImplicitAutograd & Autograd, from (2.2) we know CompositeExplicitAutograd
  //   and Autograd kernels will be picked up and CompositeImplicitAutograd is overriden.
-  //   This is fine and in practice DefaultBackend and CompositeImplicitAutograd shouldn't co-exist for an op.
+  //   This is fine and in practice CompositeExplicitAutograd and CompositeImplicitAutograd shouldn't co-exist for an op.
  // TODO: Update alias key precedence after we add new alias keys AutogradDispatchCPUOrCUDA .

  // 1. Operator registration
@ -213,21 +213,21 @@ std::pair<const AnnotatedKernel&, const char*> OperatorEntry::computeDispatchTab
    return {*direct_registration.value(), "kernel"};
  }

-  // 2.1 Use DefaultBackend kernel if available.
+  // 2.1 Use CompositeExplicitAutograd kernel if available.
  //     See Note [Undefined in dispatchTable_] for the special handling for Undefined.
-  if (dispatch_key == DispatchKey::Undefined || isIncludedInAlias(dispatch_key, DispatchKey::DefaultBackend)) {
-    if (auto default_backend_registration = getKernelForDispatchKey(DispatchKey::DefaultBackend)) {
+  if (dispatch_key == DispatchKey::Undefined || isIncludedInAlias(dispatch_key, DispatchKey::CompositeExplicitAutograd)) {
+    if (auto default_backend_registration = getKernelForDispatchKey(DispatchKey::CompositeExplicitAutograd)) {
      return {*default_backend_registration.value(), "default backend kernel"};
    }
  }

-  // Note when there's direct registration to DefaultBackend, this code path will only be hit by
+  // Note when there's direct registration to CompositeExplicitAutograd, this code path will only be hit by
  // non backend keys (e.g AutogradXXX, Batched etc) due to (2.1).
  bool has_backend_kernel =
-    hasKernelForAnyDispatchKey(getBackendKeySetFromAutograd(dispatch_key).add(DispatchKey::DefaultBackend));
+    hasKernelForAnyDispatchKey(getBackendKeySetFromAutograd(dispatch_key).add(DispatchKey::CompositeExplicitAutograd));

  // 2.2. Use CompositeImplicitAutograd kernel if available. For autograd keys, we only use kernel from CompositeImplicitAutograd
-  //      when there's no direct registration to its corresponding backend key or DefaultBackend.
+  //      when there's no direct registration to its corresponding backend key or CompositeExplicitAutograd.
  //      For AutogradOther, we return ambiguousAutogradOtherKernel_ if there's registration
  //      to any of its backends.
  //      See Note [Undefined in dispatchTable_] for the special handling for Undefined.
@ -286,9 +286,9 @@ void OperatorEntry::updateDispatchTable_(const c10::Dispatcher& dispatcher, Disp
  for (auto k : c10::getRuntimeDispatchKeySet(dispatch_key)) {
    updateDispatchTableEntry_(dispatcher, k);
  }
-  // Registration to DefaultBackend and CompositeImplicitAutograd should be populated to Undefined.
+  // Registration to CompositeExplicitAutograd and CompositeImplicitAutograd should be populated to Undefined.
  // We cannot do this above since Undefined cannot be represented in DispatchKeySet.
-  if (dispatch_key == DispatchKey::CompositeImplicitAutograd || dispatch_key == DispatchKey::DefaultBackend) {
+  if (dispatch_key == DispatchKey::CompositeImplicitAutograd || dispatch_key == DispatchKey::CompositeExplicitAutograd) {
    updateDispatchTableEntry_(dispatcher, DispatchKey::Undefined);
  }
  // Note [Refresh Runtime Autograd entries in dispatchTable_]
@ -318,7 +318,7 @@ void OperatorEntry::updateDispatchTableFull_(const c10::Dispatcher& dispatcher)
  //     no dispatch keys are available we just slide into the undefined handler which would then raise
  //     the error message.
  // In the old world of catchAll, the only way to "register" a kernel to Undefined is by registering it to
-  // catchAll. After catchAllKernel_ is removed, Undefined now can get a kernel from either DefaultBackend
+  // catchAll. After catchAllKernel_ is removed, Undefined now can get a kernel from either CompositeExplicitAutograd
  // or CompositeImplicitAutograd alias key so that we don't break the support. Ideally isIncludedInAlias(Undefined, CompositeImplicitAutograd)
  // should return true, it returns false because Undefined cannot be represented in a DispatchKeySet.
  for (uint8_t iter = 0; iter != static_cast<uint8_t>(DispatchKey::NumDispatchKeys); ++iter) {
--- a/aten/src/ATen/core/op_registration/op_registration_test.cpp
+++ b/aten/src/ATen/core/op_registration/op_registration_test.cpp
@ -1502,10 +1502,10 @@ TEST(NewOperatorRegistrationTest, BackendOverridesCompositeImplicitAutogradKerne
  }
 }

-TEST(NewOperatorRegistrationTest, dispatchWithDefaultBackendKernel) {
+TEST(NewOperatorRegistrationTest, dispatchWithCompositeExplicitAutogradKernel) {
  bool called = false;
  auto m = MAKE_TORCH_LIBRARY(test);
-  m.def("fn", torch::dispatch(c10::DispatchKey::DefaultBackend, [&](const Tensor& x) { called = true; return x; }));
+  m.def("fn", torch::dispatch(c10::DispatchKey::CompositeExplicitAutograd, [&](const Tensor& x) { called = true; return x; }));

  auto op = Dispatcher::singleton().findSchema({"test::fn", ""});
  ASSERT_TRUE(op.has_value());
@ -1550,11 +1550,11 @@ TEST(NewOperatorRegistrationTest, dispatchWithDefaultBackendKernel) {
  }
 }

-TEST(NewOperatorRegistrationTest, dispatchWithDefaultBackendAndCompositeImplicitAutogradKernel) {
+TEST(NewOperatorRegistrationTest, dispatchWithCompositeExplicitAutogradAndCompositeImplicitAutogradKernel) {
  bool backend_called = false;
  bool math_called = false;
  auto m = MAKE_TORCH_LIBRARY(test);
-  m.def("fn", torch::dispatch(c10::DispatchKey::DefaultBackend, [&](const Tensor& x) { backend_called = true; return x; }));
+  m.def("fn", torch::dispatch(c10::DispatchKey::CompositeExplicitAutograd, [&](const Tensor& x) { backend_called = true; return x; }));
  m.impl("fn", c10::DispatchKey::CompositeImplicitAutograd, [&](const Tensor& x) { math_called = true; return x; });

  auto op = Dispatcher::singleton().findSchema({"test::fn", ""});
@ -1606,11 +1606,11 @@ TEST(NewOperatorRegistrationTest, dispatchWithDefaultBackendAndCompositeImplicit
  }
 }

-TEST(NewOperatorRegistrationTest, BackendOverridesDefaultBackendKernel) {
+TEST(NewOperatorRegistrationTest, BackendOverridesCompositeExplicitAutogradKernel) {
  bool default_called = false;
  bool backend_called = false;
  auto m = MAKE_TORCH_LIBRARY(test);
-  m.def("fn", torch::dispatch(c10::DispatchKey::DefaultBackend, [&](const Tensor& x) { default_called = true; return x; }));
+  m.def("fn", torch::dispatch(c10::DispatchKey::CompositeExplicitAutograd, [&](const Tensor& x) { default_called = true; return x; }));
  m.impl("fn", c10::DispatchKey::CPU, [&](const Tensor& x) { backend_called = true; return x; });

  auto op = Dispatcher::singleton().findSchema({"test::fn", ""});
--- a/aten/src/ATen/native/README.md
+++ b/aten/src/ATen/native/README.md
@ -485,7 +485,7 @@ Here're steps to follow to decide the right dispatch keyword:
    - Yes, but you still want to provide a numerically stable gradient formula instead of using autograd, write
      ```
      dispatch:
-        DefaultBackend: kernel
+        CompositeExplicitAutograd: kernel
      ```

      You're done. This op will be called in inference for all backends.
@ -505,7 +505,7 @@ Here're steps to follow to decide the right dispatch keyword:
      For `sign` and `sign_`, write
      ```
      dispatch:
-        DefaultBackend: kernel
+        CompositeExplicitAutograd: kernel
      ```

      You're done. This op will be called in inference for all backends.
@ -529,8 +529,8 @@ It shows for a certain operator, what the computed dispatch table looks like aft

 Note that in native_functions.yaml you can mix using backend keywords and alias keywords above for one op:
  - direct registration to backend always has higher precendence than alias
-  - DO NOT provide multiple alias keywords to the same op: alias keywords have precedence `DefaultBackend > CompositeImplicitAutograd`,
-    e.g. adding both `CompositeImplicitAutograd` and `DefaultBackend` kernels for one op will completely ignore `CompositeImplicitAutograd` kernel for
+  - DO NOT provide multiple alias keywords to the same op: alias keywords have precedence `CompositeExplicitAutograd > CompositeImplicitAutograd`,
+    e.g. adding both `CompositeImplicitAutograd` and `CompositeExplicitAutograd` kernels for one op will completely ignore `CompositeImplicitAutograd` kernel for
    both inference and training. Thus this will trigger an error when native_functions.yaml is parsed.


--- a/aten/src/ATen/native/UpSampleNearest3d.cpp
+++ b/aten/src/ATen/native/UpSampleNearest3d.cpp
@ -91,7 +91,7 @@ Tensor upsample_nearest3d_cpu(
  return at::upsample_nearest3d(input, osize, scale_d, scale_h, scale_w);
 }

-// when structured kernels can handle QuantizedCPU, update these overloads to be DefaultBackend
+// when structured kernels can handle QuantizedCPU, update these overloads to be CompositeExplicitAutograd
 Tensor upsample_nearest3d_backward_cpu(
    const Tensor& grad_output,
    c10::optional<IntArrayRef> output_size,
--- a/aten/src/ATen/native/cuda/UpSampleNearest3d.cu
+++ b/aten/src/ATen/native/cuda/UpSampleNearest3d.cu
@ -289,7 +289,7 @@ Tensor upsample_nearest3d_cuda(
  return at::upsample_nearest3d(input, osize, scale_d, scale_h, scale_w);
 }

-// when structured kernels can handle QuantizedCPU, update these overloads to be DefaultBackend
+// when structured kernels can handle QuantizedCPU, update these overloads to be CompositeExplicitAutograd
 Tensor upsample_nearest3d_backward_cuda(
    const Tensor& grad_output,
    c10::optional<IntArrayRef> output_size,
--- a/aten/src/ATen/native/native_functions.yaml
+++ b/aten/src/ATen/native/native_functions.yaml
--- a/c10/core/DispatchKey.cpp
+++ b/c10/core/DispatchKey.cpp
@ -112,8 +112,8 @@ const char* toString(DispatchKey t) {
    case DispatchKey::CompositeImplicitAutograd:
      return "CompositeImplicitAutograd";

-    case DispatchKey::DefaultBackend:
-      return "DefaultBackend";
+    case DispatchKey::CompositeExplicitAutograd:
+      return "CompositeExplicitAutograd";

    case DispatchKey::TESTING_ONLY_GenericWrapper:
      return "TESTING_ONLY_GenericWrapper";
--- a/c10/core/DispatchKey.h
+++ b/c10/core/DispatchKey.h
@ -271,18 +271,19 @@ enum class DispatchKey : uint8_t {
  // See Note [Alias Dispatch Key : Autograd]
  Autograd,
  CompositeImplicitAutograd, // registered at build/aten/src/ATen/RegisterCompositeImplicitAutograd.cpp
-  DefaultBackend, // registered at
-                  // build/aten/src/ATen/RegisterDefaultBackend.cpp
+  CompositeExplicitAutograd, // registered at
+                  // build/aten/src/ATen/RegisterCompositeExplicitAutograd.cpp

  // Define an alias key to represent end of alias dispatch keys.
  // If you add new alias keys after Autograd, please also update it here.
-  EndOfAliasKeys = DefaultBackend, //
+  EndOfAliasKeys = CompositeExplicitAutograd, //

  // ~~~~~~~~~~~~~~~~~~~~~~~~~ BC ALIASES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //
  // The aliases exist for backwards compatibility reasons, they shouldn't
  // be used
  CPUTensorId = CPU,
  CUDATensorId = CUDA,
+  DefaultBackend = CompositeExplicitAutograd,
  PrivateUse1_PreAutograd = AutogradPrivateUse1,
  PrivateUse2_PreAutograd = AutogradPrivateUse2,
  PrivateUse3_PreAutograd = AutogradPrivateUse3,
--- a/c10/core/DispatchKeySet.cpp
+++ b/c10/core/DispatchKeySet.cpp
@ -3,7 +3,7 @@
 namespace c10 {

 // backend_dispatch_keyset should include all runtime backend keys.
-// Alias key DispatchKey::DefaultBackend maps to backend_dispatch_keyset
+// Alias key DispatchKey::CompositeExplicitAutograd maps to backend_dispatch_keyset
 // NestedTensor has been explicitly removed due to incompatibility with some
 // kernels, such as structured kernels, that use the DefaultBackend key.
 constexpr DispatchKeySet backend_dispatch_keyset = autogradother_backends |
@ -33,7 +33,7 @@ DispatchKeySet getRuntimeDispatchKeySet(DispatchKey t) {
      return autograd_dispatch_keyset;
    case DispatchKey::CompositeImplicitAutograd:
      return math_dispatch_keyset;
-    case DispatchKey::DefaultBackend:
+    case DispatchKey::CompositeExplicitAutograd:
      return backend_dispatch_keyset;
    default:
      return DispatchKeySet(t);
--- a/test/test_dispatch.py
+++ b/test/test_dispatch.py
@ -562,8 +562,8 @@ QuantizedCPU: fn_quantizedcpu [kernel]
            lambda m: m.def_("foo(Tensor x) -> Tensor"),
            # m.impl("foo", torch::kCPU, [](const Tensor & x) { return x })
            lambda m: m.impl_t_t("foo", "CPU", debug="fn_cpu"),
-            # m.impl("foo", torch::kDefaultBackend, [](const Tensor & x) { return x })
-            lambda m: m.impl_t_t("foo", "DefaultBackend", debug="fn_defaultbackend"),
+            # m.impl("foo", torch::kCompositeExplicitAutograd, [](const Tensor & x) { return x })
+            lambda m: m.impl_t_t("foo", "CompositeExplicitAutograd", debug="fn_defaultbackend"),
        ])
        state, table = result.state, result.table
        self.assertExpectedInline(state, '''\
@ -572,7 +572,7 @@ schema: test::foo(Tensor x) -> (Tensor)
 debug: registered at /dev/null:0
 alias analysis kind: FROM_SCHEMA
 CPU: fn_cpu :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
-DefaultBackend[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
+CompositeExplicitAutograd[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 ''')

        # computed dispatch table is too big, so we only check on a few entries we're interested in.
@ -597,8 +597,8 @@ AutogradXLA: fallthrough registered in pytorch framework [backend fallback]
            lambda m: m.impl_t_t("foo", "CPU", debug="fn_cpu"),
            # m.impl("foo", torch::kAutograd, [](const Tensor & x) { return x })
            lambda m: m.impl_t_t("foo", "Autograd", debug="fn_autograd"),
-            # m.impl("foo", torch::kDefaultBackend, [](const Tensor & x) { return x })
-            lambda m: m.impl_t_t("foo", "DefaultBackend", debug="fn_defaultbackend"),
+            # m.impl("foo", torch::kCompositeExplicitAutograd, [](const Tensor & x) { return x })
+            lambda m: m.impl_t_t("foo", "CompositeExplicitAutograd", debug="fn_defaultbackend"),
        ])
        state, table = result.state, result.table
        self.assertExpectedInline(state, '''\
@ -608,7 +608,7 @@ debug: registered at /dev/null:0
 alias analysis kind: FROM_SCHEMA
 CPU: fn_cpu :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 Autograd[alias]: fn_autograd :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
-DefaultBackend[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
+CompositeExplicitAutograd[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 ''')

        # computed dispatch table is too big, so we only check on a few entries we're interested in.
@ -636,8 +636,8 @@ QuantizedCPU: fn_defaultbackend [default backend kernel]
            lambda m: m.impl_t_t("foo", "Autograd", debug="fn_autograd"),
            # m.impl("foo", torch::kCompositeImplicitAutograd, [](const Tensor & x) { return x })
            lambda m: m.impl_t_t("foo", "CompositeImplicitAutograd", debug="fn_math"),
-            # m.impl("foo", torch::kDefaultBackend, [](const Tensor & x) { return x })
-            lambda m: m.impl_t_t("foo", "DefaultBackend", debug="fn_defaultbackend"),
+            # m.impl("foo", torch::kCompositeExplicitAutograd, [](const Tensor & x) { return x })
+            lambda m: m.impl_t_t("foo", "CompositeExplicitAutograd", debug="fn_defaultbackend"),
        ])
        state, table = result.state, result.table
        self.assertExpectedInline(state, '''\
@ -648,7 +648,7 @@ alias analysis kind: FROM_SCHEMA
 CPU: fn_cpu :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 Autograd[alias]: fn_autograd :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 CompositeImplicitAutograd[alias]: fn_math :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
-DefaultBackend[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
+CompositeExplicitAutograd[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 ''')

        # computed dispatch table is too big, so we only check on a few entries we're interested in.
@ -809,7 +809,7 @@ CompositeImplicitAutograd[alias] fn_CompositeImplicitAutograd

    def test_defaultbackend_autogradcpu(self):
        dispatcher = PythonDispatcher()
-        dispatcher.register(["CPU", "XLA", "DefaultBackend", "AutogradCPU"])
+        dispatcher.register(["CPU", "XLA", "CompositeExplicitAutograd", "AutogradCPU"])
        self.assertExpectedInline(
            dispatcher.dispatchTable(),
            '''\
@ -819,7 +819,7 @@ key             kernel
 ---------------------------
 CPU             fn_CPU [kernel]
 XLA             fn_XLA [kernel]
-QuantizedCPU    fn_DefaultBackend [default backend kernel]
+QuantizedCPU    fn_CompositeExplicitAutograd [default backend kernel]
 AutogradOther   fallthrough [backend fallback]
 AutogradCPU     fn_AutogradCPU [kernel]
 AutogradXLA     fallthrough [backend fallback]
@ -836,7 +836,7 @@ key             kernel
 CPU             fn_CPU
 XLA             fn_XLA
 AutogradCPU     fn_AutogradCPU
-DefaultBackend[alias] fn_DefaultBackend
+CompositeExplicitAutograd[alias] fn_CompositeExplicitAutograd
 '''
        )

@ -883,8 +883,8 @@ CompositeImplicitAutograd[alias] fn_CompositeImplicitAutograd

        with self.assertRaisesRegex(
                RuntimeError,
-                r"Registration to both CompositeImplicitAutograd and DefaultBackend is not allowed"):
-            dispatcher.register(["DefaultBackend", "CompositeImplicitAutograd"])
+                r"Registration to both CompositeImplicitAutograd and CompositeExplicitAutograd is not allowed"):
+            dispatcher.register(["CompositeExplicitAutograd", "CompositeImplicitAutograd"])


 if __name__ == '__main__':
--- a/tools/autograd/gen_variable_type.py
+++ b/tools/autograd/gen_variable_type.py
@ -325,7 +325,7 @@ def gen_variable_type_shard(
        if name in MANUAL_AUTOGRAD_AND_TRACER or (fn.info and fn.info.has_derivatives):
            msg = (f'There\'s a formula for {name}(or its functional variant) in derivatives.yaml. '
                   f'It\'s required to add a dispatch section for it with explicit supported backends e.g CPU/CUDA '
-                   f'or DefaultBackend in native_functions.yaml. Please see '
+                   f'or CompositeExplicitAutograd in native_functions.yaml. Please see '
                   f'https://github.com/pytorch/pytorch/tree/master/aten/src/ATen/native#choosing-the-right-dispatch-keyword '
                   f'for instructions to choose the right dispatch keyword.')
            assert f.is_abstract, msg
--- a/tools/codegen/dest/register_dispatch_key.py
+++ b/tools/codegen/dest/register_dispatch_key.py
@ -67,9 +67,9 @@ class RegisterDispatchKey:
            assert self.dispatch_key not in g.out.dispatch, \
                "Do not explicitly specify Meta dispatch key on structured " \
                "functions, they will be automatically generated for you"
-        elif self.dispatch_key == DispatchKey.DefaultBackend:
+        elif self.dispatch_key == DispatchKey.CompositeExplicitAutograd:
            assert self.dispatch_key not in g.out.dispatch, \
-                "Do not explicitly specify DefaultBackend dispatch key on structured " \
+                "Do not explicitly specify CompositeExplicitAutograd dispatch key on structured " \
                "functions, they will be automatically generated for you"
        elif not is_structured_dispatch_key(self.dispatch_key):
            return list(mapMaybe(self.gen_unstructured, g.functions()))
@ -233,7 +233,7 @@ void set_output(int64_t output_idx, IntArrayRef sizes, IntArrayRef strides,
 """

    def gen_class_set_output_body(self, k: SchemaKind) -> str:
-        if self.dispatch_key in [DispatchKey.CUDA, DispatchKey.DefaultBackend]:
+        if self.dispatch_key in [DispatchKey.CUDA, DispatchKey.CompositeExplicitAutograd]:
            maybe_set_guard = """
 auto current_device = guard_.current_device();
 if (C10_UNLIKELY(current_device.has_value())) {
@ -264,7 +264,7 @@ if (strides.empty()) {
                elif self.dispatch_key == DispatchKey.CUDA:
                    empty_impl = "at::native::empty_cuda"
                    empty_strided_impl = "at::native::empty_strided_cuda"
-                elif self.dispatch_key == DispatchKey.DefaultBackend:
+                elif self.dispatch_key == DispatchKey.CompositeExplicitAutograd:
                    empty_impl = "at::empty"
                    empty_strided_impl = "at::empty_strided"
                else:
@ -337,7 +337,7 @@ if (resized) {{
                guard_field = 'c10::hip::OptionalHIPGuardMasqueradingAsCUDA guard_;'
            else:
                guard_field = 'c10::cuda::OptionalCUDAGuard guard_;'
-        elif self.dispatch_key == DispatchKey.DefaultBackend:
+        elif self.dispatch_key == DispatchKey.CompositeExplicitAutograd:
            guard_field = 'c10::OptionalDeviceGuard guard_;'
        else:
            guard_field = ''
@ -362,7 +362,7 @@ struct {class_name} final : public {parent_class} {{
            return None

        # TODO: Now, there is something interesting going on here.  In the code below,
-        # we generate DefaultBackend implementations of functional and inplace
+        # we generate CompositeExplicitAutograd implementations of functional and inplace
        # based on the out implementation.  But in fact, out is definable by
        # functional too (just not very efficiently), and this is honestly the
        # MORE likely situation for a backend implementor.  How do we pick?
@ -372,7 +372,7 @@ struct {class_name} final : public {parent_class} {{
        # someone to implement one or the other.  We'd have to do a little bit
        # of work to not register one of these "weak" definitions unless there
        # is a strong definition somewhere in the DAG!  So it's not implemented yet.
-        if self.dispatch_key == DispatchKey.DefaultBackend and f.func.kind() is SchemaKind.out:
+        if self.dispatch_key == DispatchKey.CompositeExplicitAutograd and f.func.kind() is SchemaKind.out:
            # Never generate a default implementation for out, that's what you
            # have to define as a backend implementor
            return None
@ -421,7 +421,7 @@ return {sig.name()}({', '.join(e.expr for e in translate(cpp_sig.arguments(), si
            if self.dispatch_key is DispatchKey.Meta:
                class_name = f"structured_{meta.name(self.g)}_meta_{k.name}"
                parent_class = f"at::meta::{meta.name(self.g)}"
-            elif self.dispatch_key is DispatchKey.DefaultBackend:
+            elif self.dispatch_key is DispatchKey.CompositeExplicitAutograd:
                # TODO: dedup this branch
                class_name = f"structured_{meta.name(self.g)}_default_backend_{k.name}"
                parent_class = f"at::meta::{meta.name(self.g)}"
@ -464,7 +464,7 @@ return {sig.name()}({', '.join(e.expr for e in translate(cpp_sig.arguments(), si

            # With the expanded context, do the impl call (if not a meta
            # function)
-            if self.dispatch_key == DispatchKey.DefaultBackend:
+            if self.dispatch_key == DispatchKey.CompositeExplicitAutograd:
                # TODO: https://github.com/pytorch/pytorch/issues/53023
                out_sig_group = CppSignatureGroup.from_native_function(
                    self.g.out, method=False, fallback_binding=f.manual_cpp_binding)
--- a/tools/codegen/gen.py
+++ b/tools/codegen/gen.py
@ -124,7 +124,7 @@ def static_dispatch_extra_headers(backend: Optional[DispatchKey]) -> str:
        return ''
    return f"""
 #include <ATen/{backend}Functions.h>
-#include <ATen/DefaultBackendFunctions.h>
+#include <ATen/CompositeExplicitAutogradFunctions.h>
 #include <ATen/CompositeImplicitAutogradFunctions.h>
 """

@ -147,7 +147,7 @@ def static_dispatch(
        # migrate math/default_backend ops to use structured delegate.
        return f'return at::{backend.lower()}::{name}({exprs_str});'

-    for dispatch_key in (backend, DispatchKey.DefaultBackend, DispatchKey.CompositeImplicitAutograd):
+    for dispatch_key in (backend, DispatchKey.CompositeExplicitAutograd, DispatchKey.CompositeImplicitAutograd):
        if dispatch_key in f.dispatch:
            return f'return at::{dispatch_key.lower()}::{name}({exprs_str});'

@ -863,7 +863,7 @@ def main() -> None:
        DispatchKey.QuantizedCPU,
        DispatchKey.QuantizedCUDA,
        DispatchKey.CompositeImplicitAutograd,
-        DispatchKey.DefaultBackend,
+        DispatchKey.CompositeExplicitAutograd,
        # Meta is a magic key: it is automatically generated for structured
        # kernels
        DispatchKey.Meta,
@ -874,7 +874,7 @@ def main() -> None:
        DispatchKey.CPU,
        DispatchKey.CUDA,
        DispatchKey.CompositeImplicitAutograd,
-        DispatchKey.DefaultBackend,
+        DispatchKey.CompositeExplicitAutograd,
    }
    if options.backend_whitelist:
        dispatch_keys = [k for k in dispatch_keys if is_generic_dispatch_key(k) or str(k) in options.backend_whitelist]
--- a/tools/codegen/model.py
+++ b/tools/codegen/model.py
@ -103,8 +103,8 @@ class DispatchKey(Enum):
    NumDispatchKeys = auto()
    Autograd = auto()
    CompositeImplicitAutograd = auto()
-    DefaultBackend = auto()
-    EndOfAliasKeys = DefaultBackend
+    CompositeExplicitAutograd = auto()
+    EndOfAliasKeys = CompositeExplicitAutograd

    CPUTensorId = CPU
    CUDATensorId = CUDA
@ -134,7 +134,7 @@ STRUCTURED_DISPATCH_KEYS = {DispatchKey.CUDA, DispatchKey.CPU}
 # Dispatch keys that "support all backends".  These codegen slightly differently
 # then backend specific keys.
 def is_generic_dispatch_key(dk: DispatchKey) -> bool:
-    return dk in {DispatchKey.DefaultBackend, DispatchKey.CompositeImplicitAutograd}
+    return dk in {DispatchKey.CompositeExplicitAutograd, DispatchKey.CompositeImplicitAutograd}

 # CUDA specific dispatch keys
 def is_cuda_dispatch_key(dk: DispatchKey) -> bool:
@ -347,10 +347,10 @@ class NativeFunction:
        elif not structured and structured_delegate is None:
            dispatch[DispatchKey.CompositeImplicitAutograd] = cpp.name(func)

-        assert not (DispatchKey.DefaultBackend in dispatch and DispatchKey.CompositeImplicitAutograd in dispatch), \
-            "cannot specify both DefaultBackend and CompositeImplicitAutograd on a single kernel; each " \
+        assert not (DispatchKey.CompositeExplicitAutograd in dispatch and DispatchKey.CompositeImplicitAutograd in dispatch), \
+            "cannot specify both CompositeExplicitAutograd and CompositeImplicitAutograd on a single kernel; each " \
            "strictly subsumes the other.  If you wanted to provide an explicit autograd " \
-            "implementation, specify DefaultBackend; otherwise specify CompositeImplicitAutograd only"
+            "implementation, specify CompositeExplicitAutograd; otherwise specify CompositeImplicitAutograd only"

        e.pop('__line__')
        assert not e, f"leftover entries: {e}"
--- a/torch/_python_dispatcher.py
+++ b/torch/_python_dispatcher.py
@ -22,16 +22,18 @@ keys for a single example of each use case. These use cases are listed below:
    kernel defined in pytorch core library. Backend owner is responsible for registering both
    inference & autograd kernels in their extensions(e.g. torch-xla) for the operators they support.
    E.g. XLA, XPU, MLC
- DefaultBackend: alias key mapped to inference kernels of all backends like CPU, CUDA, XLA etc.
+- CompositeExplicitAutograd: alias key mapped to inference kernels of all backends like CPU, CUDA, XLA etc.
    Kernels registered to this key MUST work for inference for all backends.
 - Autograd: alias key mapped to autograd of all backends like AutogradCPU, AutogradXLA, AutogradOther.
    Kernels registered to this key MUST work for autograd for all backends.
- CompositeImplicitAutograd: alias key CompositeImplicitAutograd = DefaultBackend + Autograd
+- CompositeImplicitAutograd: alias key CompositeImplicitAutograd = CompositeExplicitAutograd + Autograd
    Kernels registered to this key MUST work for both inference + autograd for all backends.

-Note we only allow registrations to alias keys inside pytorch core library. E.g you shouldn't register
-a CompositeImplicitAutograd or DefaultBackend kernel from torch-xla extension, instead you should upstream the kernel into
-pytorch/pytorch repo so that it's available for all backends and continuously tested even without the extension.
+Note we only allow registrations to alias keys inside pytorch core library. E.g
+you shouldn't register a CompositeImplicitAutograd or CompositeExplicitAutograd
+kernel from torch-xla extension, instead you should upstream the kernel into
+pytorch/pytorch repo so that it's available for all backends and continuously
+tested even without the extension.

 Usage:
  dispatcher = PythonDispatcher()
@ -55,7 +57,7 @@ class PythonDispatcher:
        "XLA", "AutogradXLA",
    ]
    alias_keys = [
-        "DefaultBackend",
+        "CompositeExplicitAutograd",
        "Autograd",
        "CompositeImplicitAutograd",
    ]
@ -85,8 +87,8 @@ class PythonDispatcher:
        if len(set(dispatchKeys)) != len(dispatchKeys):
            raise RuntimeError(f"Overriden is not allowed but found duplicates in {dispatchKeys}.")
        # We currently forbid this in codegen instead of C++ dispatcher.
-        if 'CompositeImplicitAutograd' in dispatchKeys and 'DefaultBackend' in dispatchKeys:
-            raise RuntimeError("Registration to both CompositeImplicitAutograd and DefaultBackend is not allowed.")
+        if 'CompositeImplicitAutograd' in dispatchKeys and 'CompositeExplicitAutograd' in dispatchKeys:
+            raise RuntimeError("Registration to both CompositeImplicitAutograd and CompositeExplicitAutograd is not allowed.")
        for key in dispatchKeys:
            if key not in self.supported_keys:
                raise RuntimeError(f"{key} is not supported, please select a dispatch key in {self.supported_keys}.")
--- a/torch/csrc/autograd/VariableTypeManual.cpp
+++ b/torch/csrc/autograd/VariableTypeManual.cpp
@ -373,14 +373,14 @@ Tensor & detach_(Tensor & self) {
 // Ops in the following registration list are registered as
 //   (1) CompositeImplicitAutograd kernels
 //   (2) Autograd kernels
-//   (3) DefaultBackend kernels and additionally Autograd kernels
+//   (3) CompositeExplicitAutograd kernels and additionally Autograd kernels
 // The reason for (3) is that ops that also use dispatch (e.g. register CPU/CUDA/QuantizedCPU
 // kernels) will skip picking up CompositeImplicitAutograd kernels for Autograd, so we register them to both
-// DefaultBackend and Autograd instead. See
+// CompositeExplicitAutograd and Autograd instead. See
 // https://github.com/pytorch/pytorch/tree/master/aten/src/ATen/native#choosing-the-right-dispatch-keyword
 // for more details.
 // Invariant:
-// - Ops registered to CompositeImplicitAutograd or DefaultBackend below must match `MANUAL_BACKEND` set in tools/autograd/gen_variable_type.py.
+// - Ops registered to CompositeImplicitAutograd or CompositeExplicitAutograd below must match `MANUAL_BACKEND` set in tools/autograd/gen_variable_type.py.
 //   and they have manual_kernel_registration=True in native_functions.yaml.
 // - Ops registered to DispatchKey::Autograd below must be included in `MANUAL_AUTOGRAD` in tools/autograd/gen_variable_type.py

@ -393,9 +393,9 @@ TORCH_LIBRARY_IMPL(aten, Autograd, m) {
  m.impl("_fw_primal", torch::dispatch(DispatchKey::Autograd, TORCH_FN(VariableType::_fw_primal)));
 }

-TORCH_LIBRARY_IMPL(aten, DefaultBackend, m) {
-  m.impl("_backward", torch::dispatch(DispatchKey::DefaultBackend, TORCH_FN(VariableType::_backward)));
-  m.impl("requires_grad_", torch::dispatch(DispatchKey::DefaultBackend, TORCH_FN(VariableType::requires_grad_)));
+TORCH_LIBRARY_IMPL(aten, CompositeExplicitAutograd, m) {
+  m.impl("_backward", torch::dispatch(DispatchKey::CompositeExplicitAutograd, TORCH_FN(VariableType::_backward)));
+  m.impl("requires_grad_", torch::dispatch(DispatchKey::CompositeExplicitAutograd, TORCH_FN(VariableType::requires_grad_)));
 }

 TORCH_LIBRARY_IMPL(aten, CompositeImplicitAutograd, m) {
--- a/torch/csrc/utils/python_dispatch.cpp
+++ b/torch/csrc/utils/python_dispatch.cpp
@ -34,7 +34,7 @@ c10::optional<c10::DispatchKey> parseDispatchKey(const std::string& k) {
    {"QuantizedCPU", c10::DispatchKey::QuantizedCPU},
    {"CompositeImplicitAutograd", c10::DispatchKey::CompositeImplicitAutograd},
    {"Autograd", c10::DispatchKey::Autograd},
-    {"DefaultBackend", c10::DispatchKey::DefaultBackend},
+    {"CompositeExplicitAutograd", c10::DispatchKey::CompositeExplicitAutograd},
    {"AutogradCPU", c10::DispatchKey::AutogradCPU},
    {"", c10::DispatchKey::Undefined},
  };