free up dispatch key space (in C++) (#72402)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/72402 The original PR had an array-out-of-bounds access in `DispatchKeyExtractor.cpp`, that wasn't caught by ASAN and appeared to only manifest in a subset of android internal tests. After fixing the OOB access (and adding more asserts), I confirmed that the android internal test passes. Reland of D33255193 (20b8653dfa) ghstack-source-id: 148830728 Test Plan: Steps to test: (1) connect to a mobile OD (2) run `one_world android emulator android-29` in a terminal to start the android emulator (3) In a separate terminal, run the test: `buck test //fbandroid/instrumentation_tests/com/facebook/pytorch/bi_xray:instrumentation_test -c test.external_runner=tpx -- --regex 'testBIXRayModel.*PyTorchBIXRayInstrumentationTest' --force-remote-execution --run-disabled` I also ran `buck test fbandroid/mode/dbg //fbandroid/instrumentation_tests/com/facebook/pytorch/bi_xray:instrumentation_test`, which failed before and passed after the PR. Reviewed By: albanD Differential Revision: D34034848 fbshipit-source-id: 9677ee2c0a1afd1183896f7055009445712523c5 (cherry picked from commit 9ab9b12d355540ad0923c6869ed088ff6c21490c)
2025-10-20 21:14:14 +08:00 · 2022-02-14 07:53:38 -08:00
parent 4f8b986e28
commit 6690256021
20 changed files with 1748 additions and 515 deletions
--- a/test/test_dispatch.py
+++ b/test/test_dispatch.py
@ -532,8 +532,8 @@ AutogradXLA: fn_math [math kernel]
            lambda m: m.def_("foo(Tensor x) -> Tensor"),
            # m.impl("foo", torch::kCompositeImplicitAutograd, [](const Tensor & x) { return x })
            lambda m: m.impl_t_t("foo", "CompositeImplicitAutograd", debug="fn_math"),
-            # m.impl("foo", torch::kQuantizedCPU, [](const Tensor & x) { return x })
-            lambda m: m.impl_t_t("foo", "QuantizedCPU", debug="fn_quantizedcpu"),
+            # m.impl("foo", torch::kFPGA, [](const Tensor & x) { return x })
+            lambda m: m.impl_t_t("foo", "FPGA", debug="fn_fpga"),
        ])
        state, table = result.state, result.table
        self.assertExpectedInline(state, '''\
@ -541,12 +541,12 @@ name: test::foo
 schema: test::foo(Tensor x) -> (Tensor)
 debug: registered at /dev/null:0
 alias analysis kind: FROM_SCHEMA
-QuantizedCPU: fn_quantizedcpu :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
+FPGA: fn_fpga :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 CompositeImplicitAutograd[alias]: fn_math :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
 ''')

        # computed dispatch table is too big, so we only check on a few entries we're interested in.
-        extracted_table = extract_dispatch_table_with_keys(table, dispatch_keys_to_check + ('QuantizedCPU',))
+        extracted_table = extract_dispatch_table_with_keys(table, dispatch_keys_to_check + ('FPGA',))

        self.assertExpectedInline(extracted_table, '''\
 Undefined: fn_math [math kernel]
@ -557,7 +557,7 @@ AutogradOther: ambiguous_autogradother [ambiguous autogradother]
 AutogradCPU: fn_math [math kernel]
 AutogradCUDA: fn_math [math kernel]
 AutogradXLA: fn_math [math kernel]
-QuantizedCPU: fn_quantizedcpu [kernel]
+FPGA: fn_fpga [kernel]
 ''')

    def test_computed_table_with_cpu_defaultbackend(self):
@ -616,7 +616,7 @@ CompositeExplicitAutograd[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0
 ''')

        # computed dispatch table is too big, so we only check on a few entries we're interested in.
-        extracted_table = extract_dispatch_table_with_keys(table, dispatch_keys_to_check + ('QuantizedCPU',))
+        extracted_table = extract_dispatch_table_with_keys(table, dispatch_keys_to_check + ('FPGA',))

        self.assertExpectedInline(extracted_table, '''\
 Undefined: fn_defaultbackend [default backend kernel]
@ -627,7 +627,7 @@ AutogradOther: fn_autograd [autograd kernel]
 AutogradCPU: fn_autograd [autograd kernel]
 AutogradCUDA: fn_autograd [autograd kernel]
 AutogradXLA: fn_autograd [autograd kernel]
-QuantizedCPU: fn_defaultbackend [default backend kernel]
+FPGA: fn_defaultbackend [default backend kernel]
 ''')

    def test_computed_table_with_cpu_autograd_math_defaultbackend(self):
@ -808,7 +808,7 @@ key             kernel
 CPU             fn_CPU [kernel]
 XLA             fn_XLA [kernel]
 Lazy            fn_Lazy [kernel]
-QuantizedCPU    fn_CompositeImplicitAutograd [math kernel]
+FPGA            fn_CompositeImplicitAutograd [math kernel]
 AutogradOther   fn_CompositeImplicitAutograd [math kernel]
 AutogradCPU     fallthrough [backend fallback]
 AutogradXLA     fallthrough [backend fallback]
@ -829,7 +829,7 @@ key             kernel
 CPU             fn_CPU [kernel]
 XLA             fn_XLA [kernel]
 Lazy            fn_Lazy [kernel]
-QuantizedCPU    fn_CompositeImplicitAutograd [math kernel]
+FPGA            fn_CompositeImplicitAutograd [math kernel]
 AutogradOther   fn_CompositeImplicitAutograd [math kernel]
 AutogradCPU     fn_AutogradCPU [kernel]
 AutogradXLA     fallthrough [backend fallback]
@ -864,7 +864,7 @@ key             kernel
 CPU             fn_CPU [kernel]
 XLA             fn_XLA [kernel]
 Lazy            fn_Lazy [kernel]
-QuantizedCPU    fn_CompositeExplicitAutograd [default backend kernel]
+FPGA            fn_CompositeExplicitAutograd [default backend kernel]
 AutogradOther   fallthrough [backend fallback]
 AutogradCPU     fn_AutogradCPU [kernel]
 AutogradXLA     fallthrough [backend fallback]
@ -889,7 +889,7 @@ CompositeExplicitAutograd[alias] fn_CompositeExplicitAutograd

    def test_autogradother(self):
        dispatcher = PythonDispatcher()
-        dispatcher.register(["CPU", "QuantizedCPU", "CompositeImplicitAutograd"])
+        dispatcher.register(["CPU", "FPGA", "CompositeImplicitAutograd"])
        self.assertExpectedInline(
            dispatcher.dispatchTable(),
            '''\
@ -900,7 +900,7 @@ key             kernel
 CPU             fn_CPU [kernel]
 XLA             fn_CompositeImplicitAutograd [math kernel]
 Lazy            fn_CompositeImplicitAutograd [math kernel]
-QuantizedCPU    fn_QuantizedCPU [kernel]
+FPGA            fn_FPGA [kernel]
 AutogradOther   ambiguous_autogradother [ambiguous autogradother]
 AutogradCPU     fallthrough [backend fallback]
 AutogradXLA     fn_CompositeImplicitAutograd [math kernel]
@ -915,8 +915,8 @@ AutogradLazy    fn_CompositeImplicitAutograd [math kernel]
 Registered Kernels
 key             kernel
 ---------------------------
+FPGA            fn_FPGA
 CPU             fn_CPU
-QuantizedCPU    fn_QuantizedCPU
 CompositeImplicitAutograd[alias] fn_CompositeImplicitAutograd
 '''
        )