free up dispatch key space (in C++) (#72402)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/72402

The original PR had an array-out-of-bounds access in `DispatchKeyExtractor.cpp`, that wasn't caught by ASAN and appeared to only manifest in a subset of android internal tests. After fixing the OOB access (and adding more asserts), I confirmed that the android internal test passes.

Reland of D33255193 (20b8653dfa)
ghstack-source-id: 148830728

Test Plan:
Steps to test:

(1) connect to a mobile OD

(2) run `one_world android emulator android-29` in a terminal to start the android emulator

(3) In a separate terminal, run the test: `buck test //fbandroid/instrumentation_tests/com/facebook/pytorch/bi_xray:instrumentation_test -c test.external_runner=tpx -- --regex 'testBIXRayModel.*PyTorchBIXRayInstrumentationTest' --force-remote-execution --run-disabled`

I also ran `buck test fbandroid/mode/dbg //fbandroid/instrumentation_tests/com/facebook/pytorch/bi_xray:instrumentation_test`, which failed before and passed after the PR.

Reviewed By: albanD

Differential Revision: D34034848

fbshipit-source-id: 9677ee2c0a1afd1183896f7055009445712523c5
(cherry picked from commit 9ab9b12d355540ad0923c6869ed088ff6c21490c)
This commit is contained in:
Brian Hirsh
2022-02-14 07:53:38 -08:00
committed by PyTorch MergeBot
parent 4f8b986e28
commit 6690256021
20 changed files with 1748 additions and 515 deletions

View File

@ -532,8 +532,8 @@ AutogradXLA: fn_math [math kernel]
lambda m: m.def_("foo(Tensor x) -> Tensor"),
# m.impl("foo", torch::kCompositeImplicitAutograd, [](const Tensor & x) { return x })
lambda m: m.impl_t_t("foo", "CompositeImplicitAutograd", debug="fn_math"),
# m.impl("foo", torch::kQuantizedCPU, [](const Tensor & x) { return x })
lambda m: m.impl_t_t("foo", "QuantizedCPU", debug="fn_quantizedcpu"),
# m.impl("foo", torch::kFPGA, [](const Tensor & x) { return x })
lambda m: m.impl_t_t("foo", "FPGA", debug="fn_fpga"),
])
state, table = result.state, result.table
self.assertExpectedInline(state, '''\
@ -541,12 +541,12 @@ name: test::foo
schema: test::foo(Tensor x) -> (Tensor)
debug: registered at /dev/null:0
alias analysis kind: FROM_SCHEMA
QuantizedCPU: fn_quantizedcpu :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
FPGA: fn_fpga :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
CompositeImplicitAutograd[alias]: fn_math :: (Tensor _0) -> (Tensor _0) [ boxed unboxed ]
''')
# computed dispatch table is too big, so we only check on a few entries we're interested in.
extracted_table = extract_dispatch_table_with_keys(table, dispatch_keys_to_check + ('QuantizedCPU',))
extracted_table = extract_dispatch_table_with_keys(table, dispatch_keys_to_check + ('FPGA',))
self.assertExpectedInline(extracted_table, '''\
Undefined: fn_math [math kernel]
@ -557,7 +557,7 @@ AutogradOther: ambiguous_autogradother [ambiguous autogradother]
AutogradCPU: fn_math [math kernel]
AutogradCUDA: fn_math [math kernel]
AutogradXLA: fn_math [math kernel]
QuantizedCPU: fn_quantizedcpu [kernel]
FPGA: fn_fpga [kernel]
''')
def test_computed_table_with_cpu_defaultbackend(self):
@ -616,7 +616,7 @@ CompositeExplicitAutograd[alias]: fn_defaultbackend :: (Tensor _0) -> (Tensor _0
''')
# computed dispatch table is too big, so we only check on a few entries we're interested in.
extracted_table = extract_dispatch_table_with_keys(table, dispatch_keys_to_check + ('QuantizedCPU',))
extracted_table = extract_dispatch_table_with_keys(table, dispatch_keys_to_check + ('FPGA',))
self.assertExpectedInline(extracted_table, '''\
Undefined: fn_defaultbackend [default backend kernel]
@ -627,7 +627,7 @@ AutogradOther: fn_autograd [autograd kernel]
AutogradCPU: fn_autograd [autograd kernel]
AutogradCUDA: fn_autograd [autograd kernel]
AutogradXLA: fn_autograd [autograd kernel]
QuantizedCPU: fn_defaultbackend [default backend kernel]
FPGA: fn_defaultbackend [default backend kernel]
''')
def test_computed_table_with_cpu_autograd_math_defaultbackend(self):
@ -808,7 +808,7 @@ key kernel
CPU fn_CPU [kernel]
XLA fn_XLA [kernel]
Lazy fn_Lazy [kernel]
QuantizedCPU fn_CompositeImplicitAutograd [math kernel]
FPGA fn_CompositeImplicitAutograd [math kernel]
AutogradOther fn_CompositeImplicitAutograd [math kernel]
AutogradCPU fallthrough [backend fallback]
AutogradXLA fallthrough [backend fallback]
@ -829,7 +829,7 @@ key kernel
CPU fn_CPU [kernel]
XLA fn_XLA [kernel]
Lazy fn_Lazy [kernel]
QuantizedCPU fn_CompositeImplicitAutograd [math kernel]
FPGA fn_CompositeImplicitAutograd [math kernel]
AutogradOther fn_CompositeImplicitAutograd [math kernel]
AutogradCPU fn_AutogradCPU [kernel]
AutogradXLA fallthrough [backend fallback]
@ -864,7 +864,7 @@ key kernel
CPU fn_CPU [kernel]
XLA fn_XLA [kernel]
Lazy fn_Lazy [kernel]
QuantizedCPU fn_CompositeExplicitAutograd [default backend kernel]
FPGA fn_CompositeExplicitAutograd [default backend kernel]
AutogradOther fallthrough [backend fallback]
AutogradCPU fn_AutogradCPU [kernel]
AutogradXLA fallthrough [backend fallback]
@ -889,7 +889,7 @@ CompositeExplicitAutograd[alias] fn_CompositeExplicitAutograd
def test_autogradother(self):
dispatcher = PythonDispatcher()
dispatcher.register(["CPU", "QuantizedCPU", "CompositeImplicitAutograd"])
dispatcher.register(["CPU", "FPGA", "CompositeImplicitAutograd"])
self.assertExpectedInline(
dispatcher.dispatchTable(),
'''\
@ -900,7 +900,7 @@ key kernel
CPU fn_CPU [kernel]
XLA fn_CompositeImplicitAutograd [math kernel]
Lazy fn_CompositeImplicitAutograd [math kernel]
QuantizedCPU fn_QuantizedCPU [kernel]
FPGA fn_FPGA [kernel]
AutogradOther ambiguous_autogradother [ambiguous autogradother]
AutogradCPU fallthrough [backend fallback]
AutogradXLA fn_CompositeImplicitAutograd [math kernel]
@ -915,8 +915,8 @@ AutogradLazy fn_CompositeImplicitAutograd [math kernel]
Registered Kernels
key kernel
---------------------------
FPGA fn_FPGA
CPU fn_CPU
QuantizedCPU fn_QuantizedCPU
CompositeImplicitAutograd[alias] fn_CompositeImplicitAutograd
'''
)