Files
pytorch/test/inductor/test_aot_inductor_arrayref.py
angelayi 3c8c509a9c [export] Fix custom ops in subgraphs (#160004)
Fixes https://github.com/pytorch/pytorch/issues/159995

Currently there are two problems with extern kernels in subgraphs:
1. They don't get serialized to the extern kernel json file because we only look at the toplevel graph.
2. Since the scope of each extern_kernel list is within its own subgraph, the indices referencing the operator is messed up because each subgraph will start counting from 0.

So, this PR moves the extern_kernels list to a global view (under virtualized) so that we can count the extern kernels across subgraphs and the toplevel graph.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/160004
Approved by: https://github.com/ydwu4
2025-08-18 15:42:19 +00:00

246 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Owner(s): ["module: inductor"]
import sys
import unittest
from torch._inductor.test_case import TestCase
from torch.testing._internal.common_utils import IS_CI, IS_FBCODE, IS_WINDOWS
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
try:
try:
from .test_aot_inductor import (
AOTInductorTestsTemplate,
check_model,
check_model_with_multiple_inputs,
code_check_count,
)
from .test_torchinductor import copy_tests, TestFailure
except ImportError:
from test_aot_inductor import ( # @manual
AOTInductorTestsTemplate,
check_model,
check_model_with_multiple_inputs,
code_check_count,
)
from test_torchinductor import ( # @manual=fbcode//caffe2/test/inductor:test_inductor-library
copy_tests,
TestFailure,
)
except (unittest.SkipTest, ImportError):
if __name__ == "__main__":
sys.exit(0)
raise
def fail_stack_allocation(is_skip=False):
return TestFailure(
(
"cpu_with_stack_allocation",
"cpu_with_stack_allocation_and_minimal_arrayref_interface",
),
is_skip=is_skip,
)
def fail_minimal_arrayref_interface(is_skip=False):
return TestFailure(
("cpu_with_stack_allocation_and_minimal_arrayref_interface",),
is_skip=is_skip,
)
# test_failures, xfail by default, set is_skip=True to skip
CPU_TEST_FAILURES = {
# TODO: error: complex64 was not declared in this scope
"test_add_complex": fail_minimal_arrayref_interface(is_skip=True),
"test_conv_freezing": fail_minimal_arrayref_interface(is_skip=True),
"test_deconv_freezing": fail_minimal_arrayref_interface(is_skip=True),
"test_cond_nested": fail_minimal_arrayref_interface(),
"test_cond_simple": fail_minimal_arrayref_interface(),
"test_cond_symint_input": fail_minimal_arrayref_interface(),
"test_cond_use_buffers_from_outer_scope": fail_minimal_arrayref_interface(),
"test_cond_with_multiple_outputs": fail_minimal_arrayref_interface(),
"test_cond_with_parameters": fail_minimal_arrayref_interface(),
"test_cond_with_reinterpret_view_inputs_outputs": fail_minimal_arrayref_interface(),
"test_custom_op_in_subgraph": fail_minimal_arrayref_interface(),
"test_cond_share_predicte": fail_stack_allocation(is_skip=True),
"test_cond_unbacked_symint_closure_dynamic_True": fail_minimal_arrayref_interface(),
"test_while_loop_with_unbacked_symint_closure_dynamic_True": fail_minimal_arrayref_interface(),
"test_while_loop_with_unbacked_symint_closure_dynamic_False": fail_minimal_arrayref_interface(),
"test_while_loop_with_mixed_device_dynamic_True": fail_stack_allocation(),
"test_while_loop_with_mixed_device_dynamic_False": fail_stack_allocation(),
"test_while_loop_with_sym_expr_cond_dynamic_True": fail_minimal_arrayref_interface(),
"test_while_loop_with_sym_expr_cond_dynamic_False": fail_minimal_arrayref_interface(),
"test_while_loop_with_conv_dynamic_True": fail_minimal_arrayref_interface(),
"test_while_loop_with_conv_dynamic_False": fail_minimal_arrayref_interface(),
"test_while_loop_with_parameters": fail_minimal_arrayref_interface(),
"test_while_loop_with_pytree_inputs": fail_stack_allocation(),
# FIXME: failed with Segfault while exiting the Python runtime
"test_duplicate_constant_folding": fail_stack_allocation(is_skip=True),
"test_aot_inductor_consts_cpp_build": fail_stack_allocation(is_skip=True),
"test_stride_with_unbacked_expr": fail_minimal_arrayref_interface(is_skip=True),
# TODO: use of deleted function RAIIAtenTensorHandle
"test_dup_unbacked_sym_decl": fail_minimal_arrayref_interface(is_skip=True),
# TODO: use of deleted function RAIIAtenTensorHandle
"test_dup_unbacked_sym_decl_with_refinement": fail_minimal_arrayref_interface(
is_skip=True
),
# https://github.com/pytorch/pytorch/issues/129550
# https://github.com/pytorch/pytorch/issues/123691
"test_dynamic_scalar": fail_stack_allocation(is_skip=True),
# https://github.com/pytorch/pytorch/issues/122980
"test_fft_c2c": fail_stack_allocation(is_skip=True),
"test_freezing": fail_minimal_arrayref_interface(is_skip=True),
"test_linear_freezing": fail_minimal_arrayref_interface(is_skip=True),
# FIXME: failed with Segfault while exiting the Python runtime
"test_missing_cubin": fail_stack_allocation(is_skip=True),
# minimal arrayref interface only works with CPU; test crashes.
# https://github.com/pytorch/pytorch/issues/122983
"test_multi_device": fail_minimal_arrayref_interface(is_skip=True),
# TODO: AssertionError: unsupported Optional type in convert_arg_type: Generator
"test_normal_functional": fail_stack_allocation(is_skip=True),
# the test segfaults
"test_repeat_output": fail_stack_allocation(is_skip=True),
# segfault
"test_buffer_mutation_1": fail_stack_allocation(is_skip=True),
# segfault
"test_buffer_mutation_2": fail_stack_allocation(is_skip=True),
# segfault
"test_bool_input": fail_stack_allocation(is_skip=True),
# segfault
"test_int_list_input": fail_stack_allocation(is_skip=True),
# segfault
# 'AOTInductorTestABICompatibleCpuWithStackAllocation' object has no attribute 'code_check_count'
"test_buffer_mutation_3": fail_stack_allocation(is_skip=True),
"test_zero_size_buffer": fail_stack_allocation(is_skip=True),
# FIXME: failed with Segfault while exiting the Python runtime
"test_scatter_fallback": fail_stack_allocation(is_skip=True),
# Looks like the same issue as https://github.com/pytorch/pytorch/issues/122978
"test_scatter_reduce_fallback": fail_minimal_arrayref_interface(is_skip=True),
# Looks like the same issue as https://github.com/pytorch/pytorch/issues/122978
"test_index_put_fallback": fail_minimal_arrayref_interface(is_skip=True),
# https://github.com/pytorch/pytorch/issues/122984
"test_index_put_with_none_index": fail_minimal_arrayref_interface(is_skip=True),
# FIXME: failed with Segfault while exiting the Python runtime
"test_constant": fail_stack_allocation(is_skip=True),
# Looks like the same issue as https://github.com/pytorch/pytorch/issues/122978
"test_shifted_constraint_ranges": fail_stack_allocation(is_skip=True),
# https://github.com/pytorch/pytorch/issues/123691
"test_amp_fallback_random": fail_minimal_arrayref_interface(is_skip=True),
# https://github.com/pytorch/pytorch/issues/123691
"test_zero_grid_with_unbacked_symbols": fail_minimal_arrayref_interface(
is_skip=True
),
# failed on MacOS
"test_zero_grid_with_backed_symbols": fail_stack_allocation(is_skip=True),
# https://github.com/pytorch/pytorch/issues/122990
"test_cond_non_tensor_predicates_dynamic_False": fail_stack_allocation(
is_skip=True
),
# same issue as https://github.com/pytorch/pytorch/issues/122990
"test_cond_non_tensor_predicates_dynamic_True": fail_stack_allocation(is_skip=True),
"test_cond_mismatched_branch_output_dynamic_True": fail_stack_allocation(
is_skip=True
),
"test_cond_mismatched_branch_output_dynamic_False": fail_stack_allocation(
is_skip=True
),
# https://github.com/pytorch/pytorch/issues/122991
"test_runtime_checks_complex": fail_stack_allocation(is_skip=True),
"test_runtime_checks_fp8": fail_stack_allocation(is_skip=True),
"test_while_loop_simple": fail_stack_allocation(is_skip=True),
"test_while_loop_nested": fail_stack_allocation(is_skip=True),
"test_while_loop_with_outer_code": fail_stack_allocation(is_skip=True),
# TODO: error: cannot convert ArrayRefTensor<float> to AtenTensorHandle
"test_while_loop_with_outer_buffers": fail_stack_allocation(is_skip=True),
# TODO: use of undeclared identifier 'float8_e4m3fn' and 'half'
"test_fp8": fail_minimal_arrayref_interface(is_skip=True),
"test_size_from_multi_output": fail_stack_allocation(is_skip=True),
"test_torchvision_transforms_functional_tensor_resize": fail_minimal_arrayref_interface(),
# TODO: AttributeError: 'ShapeAsConstantBuffer' object has no attribute 'dtype'
"test_symint_item": fail_minimal_arrayref_interface(is_skip=True),
# TODO: AttributeError: 'ShapeAsConstantBuffer' object has no attribute 'dtype'
"test_symbool_item": fail_minimal_arrayref_interface(is_skip=True),
# TODO: AttributeError: 'ShapeAsConstantBuffer' object has no attribute 'dtype'
"test_symfloat_item": fail_minimal_arrayref_interface(is_skip=True),
# Causes a segfault when the process exits
"test_view_outputs": fail_stack_allocation(is_skip=True),
"test_pytree_inputs": fail_stack_allocation(is_skip=True),
"test_duplicated_params": fail_stack_allocation(is_skip=True),
"test_output_misaligned": fail_stack_allocation(is_skip=True),
"test_no_args": fail_stack_allocation(is_skip=True),
"test_fqn": fail_stack_allocation(is_skip=True),
"test_assert_tensor_meta": fail_stack_allocation(is_skip=True),
"test_clamp_decomposition": fail_stack_allocation(is_skip=True),
"test_aoti_constant_tensor_name_collision": fail_stack_allocation(is_skip=True),
"test_cond_unbacked_symint_closure_dynamic_False": fail_stack_allocation(
is_skip=True
),
"test_empty_cat_dtype_promotion": fail_stack_allocation(is_skip=True),
"test_pad_fallback": fail_stack_allocation(is_skip=True),
"test_simple_embed_kernel_binary_False_max_autotune_True": fail_stack_allocation(
is_skip=True
),
"test_simple_embed_kernel_binary_True_max_autotune_True": fail_stack_allocation(
is_skip=True
),
# When running test_seq with test_issue_140766, the process segfaults
"test_seq": fail_stack_allocation(is_skip=True),
}
class AOTInductorTestABICompatibleCpuWithStackAllocation(TestCase):
device = "cpu"
device_type = "cpu"
check_model = check_model
check_model_with_multiple_inputs = check_model_with_multiple_inputs
code_check_count = code_check_count
allow_stack_allocation = True
use_minimal_arrayref_interface = False
copy_tests(
AOTInductorTestsTemplate,
AOTInductorTestABICompatibleCpuWithStackAllocation,
"cpu_with_stack_allocation",
CPU_TEST_FAILURES,
)
class AOTInductorTestABICompatibleCpuWithStackAllocationAndMinimalArrayRefInterface(
TestCase
):
device = "cpu"
device_type = "cpu"
check_model = check_model
check_model_with_multiple_inputs = check_model_with_multiple_inputs
code_check_count = code_check_count
allow_stack_allocation = True
use_minimal_arrayref_interface = True
if IS_FBCODE:
# The following tests look like they pass in both pytest and unittest (xml
# and terminal output say pass), but the process will segfault. This only
# happens in OSS CI and is fine internally.
# See https://github.com/pytorch/pytorch/issues/123691
copy_tests(
AOTInductorTestsTemplate,
AOTInductorTestABICompatibleCpuWithStackAllocationAndMinimalArrayRefInterface,
"cpu_with_stack_allocation_and_minimal_arrayref_interface",
CPU_TEST_FAILURES,
)
if __name__ == "__main__":
from torch._inductor.test_case import run_tests
run_tests(needs="filelock")