[dynamo] Extend LazyVariableTracker to tuples (#117426)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/117426 Approved by: https://github.com/lezcano, https://github.com/jansel
2025-10-20 21:14:14 +08:00 · 2024-01-17 20:53:23 -08:00
parent 26956980c6
commit 6e4e81a9ef
19 changed files with 61 additions and 63 deletions
--- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_inference.csv
@ -86,7 +86,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,41
+detectron2_fcos_r_50_fpn,pass,35



@ -354,7 +354,7 @@ vgg16,pass,0



-vision_maskrcnn,pass,17
+vision_maskrcnn,pass,16



--- a/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/aot_eager_torchbench_training.csv
@ -294,7 +294,7 @@ vgg16,pass,7



-vision_maskrcnn,pass,35
+vision_maskrcnn,pass,34



--- a/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/cpu_inductor_torchbench_inference.csv
@ -54,47 +54,47 @@ densenet121,pass,0



-detectron2_fasterrcnn_r_101_c4,pass,52
+detectron2_fasterrcnn_r_101_c4,pass,51



-detectron2_fasterrcnn_r_101_dc5,pass,52
+detectron2_fasterrcnn_r_101_dc5,pass,51



-detectron2_fasterrcnn_r_101_fpn,pass,56
+detectron2_fasterrcnn_r_101_fpn,pass,55



-detectron2_fasterrcnn_r_50_c4,pass,52
+detectron2_fasterrcnn_r_50_c4,pass,51



-detectron2_fasterrcnn_r_50_dc5,pass,52
+detectron2_fasterrcnn_r_50_dc5,pass,51



-detectron2_fasterrcnn_r_50_fpn,pass,56
+detectron2_fasterrcnn_r_50_fpn,pass,55



-detectron2_fcos_r_50_fpn,pass,44
+detectron2_fcos_r_50_fpn,pass,38



-detectron2_maskrcnn_r_101_c4,fail_accuracy,67
+detectron2_maskrcnn_r_101_c4,fail_accuracy,66



-detectron2_maskrcnn_r_101_fpn,pass,74
+detectron2_maskrcnn_r_101_fpn,pass,73



-detectron2_maskrcnn_r_50_c4,pass,67
+detectron2_maskrcnn_r_50_c4,pass,66



-detectron2_maskrcnn_r_50_fpn,pass,74
+detectron2_maskrcnn_r_50_fpn,pass,73



@ -322,7 +322,7 @@ vgg16,pass,0



-vision_maskrcnn,pass,29
+vision_maskrcnn,pass,28



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_inference.csv
@ -346,7 +346,7 @@ vgg16,pass,0



-vision_maskrcnn,pass,17
+vision_maskrcnn,pass,16



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_aot_eager_torchbench_training.csv
@ -286,7 +286,7 @@ vgg16,pass,7



-vision_maskrcnn,pass,35
+vision_maskrcnn,pass,34



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_cpu_inductor_torchbench_inference.csv
@ -274,7 +274,7 @@ vgg16,pass,0



-vision_maskrcnn,pass,29
+vision_maskrcnn,pass,28



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_inference.csv
@ -346,7 +346,7 @@ vgg16,pass,0



-vision_maskrcnn,pass,17
+vision_maskrcnn,pass,16



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamic_inductor_torchbench_training.csv
@ -286,7 +286,7 @@ vgg16,pass,7



-vision_maskrcnn,pass,35
+vision_maskrcnn,pass,34



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_inference.csv
@ -86,7 +86,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,41
+detectron2_fcos_r_50_fpn,pass,35



@ -354,7 +354,7 @@ vgg16,pass,0



-vision_maskrcnn,pass,17
+vision_maskrcnn,pass,16



--- a/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/dynamo_eager_torchbench_training.csv
@ -294,7 +294,7 @@ vgg16,pass,7



-vision_maskrcnn,pass,35
+vision_maskrcnn,pass,34



--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_inference.csv
@ -86,7 +86,7 @@ detectron2_fasterrcnn_r_50_fpn,eager_fail_to_run,0



-detectron2_fcos_r_50_fpn,pass,42
+detectron2_fcos_r_50_fpn,pass,36



@ -354,7 +354,7 @@ vgg16,pass,0



-vision_maskrcnn,pass,17
+vision_maskrcnn,pass,16



--- a/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv
+++ b/benchmarks/dynamo/ci_expected_accuracy/inductor_torchbench_training.csv
@ -294,7 +294,7 @@ vgg16,pass,7



-vision_maskrcnn,pass,35
+vision_maskrcnn,pass,34



--- a/test/dynamo/test_functions.py
+++ b/test/dynamo/test_functions.py
@ -785,6 +785,21 @@ class FunctionTests(torch._dynamo.test_case.TestCase):
        self.assertTrue(same(ref[1]["e"], res[1]["e"]))
        self.assertTrue(same(ref[1][param], res[1][param]))

+    def test_dict_tuple_lazy_guard(self):
+        @torch.compile(backend="eager")
+        def fn(x, y):
+            return torch.sin(x) * y[1]
+
+        fn(torch.randn(3), {1: 1, 2: 2})
+        # Changing the value of other key should not causing recompilation
+        with unittest.mock.patch("torch._dynamo.config.error_on_recompile", True):
+            fn(torch.randn(3), {1: 1, 2: 3})
+
+        fn(torch.randn(3), (1, 2, 3))
+        # Changing the value of index 0, 2 (not 1) should not cause recompilation
+        with unittest.mock.patch("torch._dynamo.config.error_on_recompile", True):
+            fn(torch.randn(3), (11, 2, 13))
+
    @make_test
    def test_call_dict1(x):
        d1 = dict()
--- a/test/dynamo/test_recompile_ux.py
+++ b/test/dynamo/test_recompile_ux.py
@ -266,9 +266,7 @@ tensor 'L['x']' size mismatch at index 0. expected 8, actual 12""".split(
            opt_f([7, 8])

            for line in """\
-len(L['x']) == 3
-L['x'][0] == 4
-L['x'][1] == 5""".split(
+len(L['x']) == 3""".split(
                "\n"
            ):
                self.assertIn(line, filter_reasons())
@ -278,9 +276,7 @@ L['x'][1] == 5""".split(

            for line in """\
 len(L['x']) == 2
-L['x'][0] == 7
-len(L['x']) == 3
-L['x'][0] == 4""".split(
+len(L['x']) == 3""".split(
                "\n"
            ):
                self.assertIn(line, filter_reasons())
--- a/test/functorch/test_eager_transforms.py
+++ b/test/functorch/test_eager_transforms.py
@ -419,7 +419,6 @@ class TestGradTransform(TestCase):
        expected = -y * x.sin()
        self.assertEqual(result, expected)

-    @xfailIfTorchDynamo
    def test_grad_of_vjp_of_grad_composition(self, device):
        x = torch.randn([], device=device)
        y = torch.randn([], device=device)
--- a/test/test_linalg.py
+++ b/test/test_linalg.py
@ -4934,6 +4934,7 @@ class TestLinalg(TestCase):

    @precisionOverride({torch.float32: 1e-2, torch.complex64: 1e-2})
    @skipCUDAIfNoMagmaAndNoCusolver
+    @skipIfTorchDynamo("Runtime error with torch._C._linalg.linalg_lu_factor")
    @skipCPUIfNoLapack
    @dtypes(*floating_and_complex_types())
    def test_linalg_lu_family(self, device, dtype):
--- a/torch/_dynamo/variables/builder.py
+++ b/torch/_dynamo/variables/builder.py
@ -801,9 +801,10 @@ class VariableBuilder:
                unimplemented("list elements are pointing to the list itself")

        output = [
-            VariableBuilder(self.tx, GetItemSource(self.get_source(), i))(item)
+            LazyVariableTracker.create(item, source=GetItemSource(self.get_source(), i))
            for i, item in enumerate(value)
        ]
+
        result = BaseListVariable.cls_for_instance(value)(
            output, mutable_local=MutableLocal()
        )
--- a/torch/_dynamo/variables/builtin.py
+++ b/torch/_dynamo/variables/builtin.py
@ -493,6 +493,13 @@ class BuiltinVariable(VariableTracker):
            k: v.as_python_constant() for k, v in kwargs.items()
        }

+    def has_constant_handler(self, args, kwargs):
+        constant_args = check_constant_args(args, kwargs)
+        unspec_python_args = self.unspec_python_args(*args, **kwargs)
+        return self.can_constant_fold_through() and (
+            constant_args or unspec_python_args
+        )
+
    def call_function(
        self, tx, args: "List[VariableTracker]", kwargs: "Dict[str, VariableTracker]"
    ) -> "VariableTracker":
@ -501,14 +508,9 @@ class BuiltinVariable(VariableTracker):

        args = [v.realize() for v in args]
        kwargs = {k: v.realize() for k, v in kwargs.items()}
-        constant_args = check_constant_args(args, kwargs)
-        tensor_args = self.tensor_args(*args, **kwargs)
-        unspec_python_args = self.unspec_python_args(*args, **kwargs)
-        has_constant_handler = self.can_constant_fold_through() and (
-            constant_args or unspec_python_args
-        )
        assert isinstance(args, (list, tuple))
        assert isinstance(kwargs, dict)
+        tensor_args = self.tensor_args(*args, **kwargs)

        # args[0] is list and args[1] is unspec
        if self.fn is operator.getitem and not isinstance(
@ -646,6 +648,7 @@ class BuiltinVariable(VariableTracker):
            try:
                inspect.signature(handler).bind(tx, *args, **kwargs)
            except TypeError as exc:
+                has_constant_handler = self.has_constant_handler(args, kwargs)
                if not has_constant_handler:
                    log.warning(
                        "incorrect arg count %s %s and no constant handler",
@ -660,11 +663,17 @@ class BuiltinVariable(VariableTracker):
                if result is not None:
                    return result
            except Unsupported as exc:
+                has_constant_handler = self.has_constant_handler(args, kwargs)
                if not has_constant_handler:
                    raise
                # Actually, we will handle this just fine
                exc.remove_from_stats()

+        # NB: call to has_constant_handler is deliberately delayed post generic
+        # handler because has_constant_handler calls as_python_constant
+        # internally which realizes LazyVariableTracker for ConstantVariables,
+        # unnecessarily putting guards on objects which might not actually be used.
+        has_constant_handler = self.has_constant_handler(args, kwargs)
        if has_constant_handler:
            # constant fold
            return variables.ConstantVariable.create(
--- a/torch/testing/_internal/dynamo_test_failures.py
+++ b/torch/testing/_internal/dynamo_test_failures.py
@ -126,8 +126,6 @@ dynamo_expected_failures = {
    "TestLinalgCPU.test_inverse_cpu_complex128",
    "TestLinalgCPU.test_norm_dtype_cpu_complex128",
    "TestLinalgCPU.test_householder_product_cpu_float64",
-    "TestLinalgCPU.test_linalg_lu_family_cpu_float32",
-    "TestLinalgCPU.test_linalg_lu_family_cpu_float64",
    "TestLinalgCPU.test_addr_integral_cpu_int64",
    "TestLinalgCPU.test_norm_vector_cpu_float32",
    "TestLinalgCPU.test_solve_cpu_complex128",
@ -152,7 +150,6 @@ dynamo_expected_failures = {
    "TestLinalgCPU.test_addmm_sizes_cpu_float32",
    "TestLinalgCPU.test_norm_bfloat16_and_half_cpu_float16",
    "TestLinalgCPU.test_householder_product_cpu_complex64",
-    "TestLinalgCPU.test_linalg_lu_family_cpu_complex128",
    "TestLinalgCPU.test_inverse_cpu_float64",
    "TestLinalgCPU.test_slogdet_errors_and_warnings_cpu_complex64",
    "TestLinalgCPU.test_pinv_cpu_complex64",
@ -161,7 +158,6 @@ dynamo_expected_failures = {
    "TestLinalgCPU.test_einsum_sublist_format_cpu_complex128",
    "TestLinalgCPU.test_geqrf_cpu_complex64",
    "TestLinalgCPU.test_slogdet_errors_and_warnings_cpu_float64",
-    "TestLinalgCPU.test_linalg_lu_family_cpu_complex64",
    "TestLinalgCPU.test_geqrf_cpu_float64",
    "TestLinalgCPU.test_householder_product_cpu_complex128",
    "TestLinalgCPU.test_geqrf_cpu_float32",
@ -821,10 +817,8 @@ dynamo_expected_failures = {
    "TestIndexing.test_index_no_floats",  # torch_np/numpy_tests/core/test_indexing
    "TestBooleanIndexing.test_boolean_indexing_weirdness",  # torch_np/numpy_tests/core/test_indexing
    "TestBooleanIndexing.test_bool_as_int_argument_errors",  # torch_np/numpy_tests/core/test_indexing
-    "TestBroadcastedAssignments.test_simple_broadcasting_errors",  # torch_np/numpy_tests/core/test_indexing
    "TestFloatNonIntegerArgument.test_non_integer_argument_errors",  # torch_np/numpy_tests/core/test_indexing
    "TestIndexing.test_slicing_no_floats",  # torch_np/numpy_tests/core/test_indexing
-    "TestBroadcastedAssignments.test_prepend_not_one",  # torch_np/numpy_tests/core/test_indexing
    "TestFloatNonIntegerArgument.test_reduce_axis_float_index",  # torch_np/numpy_tests/core/test_indexing
    "TestEinsum.test_different_paths_dtype_e",  # torch_np/numpy_tests/core/test_einsum
    "TestEinsum.test_different_paths_dtype_B",  # torch_np/numpy_tests/core/test_einsum
@ -2073,7 +2067,6 @@ dynamo_expected_failures = {
    "TestMkldnnCPU.test_tanh_cpu",  # test_mkldnn
    "TestMkldnnCPU.test_conv2d_cpu",  # test_mkldnn
    "TestMkldnnCPU.test_batch_norm_3d_cpu",  # test_mkldnn
-    "TestFunctionSchema.test_serialize_and_deserialize",  # test_function_schema
    "FakeTensorOperatorInvariants.test_like_ops",  # test_fake_tensor
    "FakeTensorConverterTest.test_memoized_conversion_from_meta",  # test_fake_tensor
    "FakeTensorOperatorInvariants.test_non_kwarg_only_device",  # test_fake_tensor
@ -2794,7 +2787,6 @@ dynamo_expected_failures = {
    "TestVmapOperatorsLegacy.test_contiguous",  # test_legacy_vmap
    "TestVmapAPILegacy.test_accepts_nested_inputs",  # test_legacy_vmap
    "TestVmapAPILegacy.test_nested_out_dims",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_add_cpu",  # test_legacy_vmap
    "TestVmapBatchedGradientLegacyCPU.test_inplace_manyview_cpu",  # test_legacy_vmap
    "TestVmapAPILegacy.test_functools_partial",  # test_legacy_vmap
    "TestVmapBatchedGradientLegacyCPU.test_unrelated_output_cpu",  # test_legacy_vmap
@ -2803,21 +2795,16 @@ dynamo_expected_failures = {
    "TestVmapAPILegacy.test_single_input",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_chunk",  # test_legacy_vmap
    "TestVmapBatchedGradientLegacyCPU.test_mul_cpu",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_reshape_cpu",  # test_legacy_vmap
    "TestVmapBatchedGradientLegacyCPU.test_unrelated_output_multiple_grad_cpu",  # test_legacy_vmap
-    "TestVmapOperatorsLegacy.test_stack",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_select",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_binary_pointwise_ops",  # test_legacy_vmap
    "TestVmapAPILegacy.test_non_tensor_output_raises",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_max_cpu",  # test_legacy_vmap
    "TestVmapBatchedGradientLegacyCPU.test_binary_cross_entropy_cpu",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_diagonal",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_select_cpu",  # test_legacy_vmap
    "TestVmapAPILegacy.test_nonzero_out_dims",  # test_legacy_vmap
    "TestVmapAPILegacy.test_unsupported_op_err_msg",  # test_legacy_vmap
    "TestVmapAPILegacy.test_batched_gradient_basic",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_slice",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_min_cpu",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_expand_as",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_unfold",  # test_legacy_vmap
    "TestVmapBatchedGradientLegacyCPU.test_sigmoid_cpu",  # test_legacy_vmap
@ -2827,16 +2814,11 @@ dynamo_expected_failures = {
    "TestVmapOperatorsLegacy.test_new_empty_strided",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_is_floating_point",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_split",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_stack_cpu",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_fill_and_zero_inplace",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_is_complex",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_expand_cpu",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_as_strided",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_slice_cpu",  # test_legacy_vmap
    "TestVmapAPILegacy.test_nested_with_different_map_dim",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_new_zeros",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_trace_cpu",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_permute_cpu",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_view_as",  # test_legacy_vmap
    "TestVmapBatchedGradientLegacyCPU.test_logsumexp_cpu",  # test_legacy_vmap
    "TestVmapBatchedGradientLegacyCPU.test_log1p_cpu",  # test_legacy_vmap
@ -2850,17 +2832,13 @@ dynamo_expected_failures = {
    "TestVmapBatchedGradientLegacyCPU.test_inplace_on_view_cpu",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_new_empty",  # test_legacy_vmap
    "TestVmapBatchedGradientLegacyCPU.test_lgamma_cpu",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_threshold_cpu",  # test_legacy_vmap
    "TestVmapAPILegacy.test_multiple_out_dims",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_result_type",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_sum_dim",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_to",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_diagonal_cpu",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_sub_cpu",  # test_legacy_vmap
    "TestVmapAPILegacy.test_backward_unsupported_interaction",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_comparison_ops",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_is_contiguous",  # test_legacy_vmap
-    "TestVmapOperatorsLegacy.test_cat",  # test_legacy_vmap
    "TestVmapAPILegacy.test_multiple_outputs",  # test_legacy_vmap
    "TestVmapAPILegacy.test_inplace_fallback_unary",  # test_legacy_vmap
    "TestVmapAPILegacy.test_out_dim_out_of_bounds_err_msg",  # test_legacy_vmap
@ -2875,7 +2853,6 @@ dynamo_expected_failures = {
    "TestVmapOperatorsLegacy.test_no_random_op_support",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_unbind",  # test_legacy_vmap
    "TestVmapAPILegacy.test_non_default_in_dims_out_dims",  # test_legacy_vmap
-    "TestVmapBatchedGradientLegacyCPU.test_median_cpu",  # test_legacy_vmap
    "TestVmapOperatorsLegacy.test_T_numpy",  # test_legacy_vmap
    "TestNamedTensor.test_addmv",  # test_namedtensor
    "TestNamedTensor.test_cummax_cummin",  # test_namedtensor