Enable UFMT on test/jit_hooks, test/lazy and some files (#123807)

Part of: #123062 Ran lintrunner on: - `test/jit_hooks` - `test/lazy` - `test/linear.py` - `test/load_torchscript_model.py` - `test/mkl_verbose.py` - `test/mkldnn_verbose.py` with command: ```bash lintrunner -a --take UFMT --all-files ``` Co-authored-by: Edward Z. Yang <ezyang@fb.com> Pull Request resolved: https://github.com/pytorch/pytorch/pull/123807 Approved by: https://github.com/ezyang
2025-10-20 21:14:14 +08:00 · 2024-04-12 03:39:34 +00:00
parent 4e3022dbe9
commit 706f7d1f22
11 changed files with 234 additions and 145 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -1162,18 +1162,6 @@ exclude_patterns = [
    'test/functorch/test_vmap.py',
    'test/functorch/test_vmap_registrations.py',
    'test/functorch/xfail_suggester.py',
-    'test/lazy/__init__.py',
-    'test/lazy/test_bindings.py',
-    'test/lazy/test_debug_util.py',
-    'test/lazy/test_extract_compiled_graph.py',
-    'test/lazy/test_meta_kernel.py',
-    'test/lazy/test_reuse_ir.py',
-    'test/lazy/test_step_closures.py',
-    'test/lazy/test_ts_opinfo.py',
-    'test/linear.py',
-    'test/load_torchscript_model.py',
-    'test/mkl_verbose.py',
-    'test/mkldnn_verbose.py',
    'test/nn/test_convolution.py',
    'test/nn/test_dropout.py',
    'test/nn/test_embedding.py',
--- a/test/lazy/test_bindings.py
+++ b/test/lazy/test_bindings.py
@ -2,6 +2,7 @@

 import torch._lazy.metrics

+
 def test_metrics():
    names = torch._lazy.metrics.counter_names()
    assert len(names) == 0, f"Expected no counter names, but got {names}"
--- a/test/lazy/test_debug_util.py
+++ b/test/lazy/test_debug_util.py
@ -3,11 +3,11 @@
 import os
 import re
 import tempfile
-import torch.nn as nn
 import unittest

 import torch._lazy
 import torch._lazy.ts_backend
+import torch.nn as nn
 from torch.testing._internal.common_utils import IS_WINDOWS, run_tests, TestCase

 torch._lazy.ts_backend.init()
@ -21,15 +21,16 @@ class DebugUtilTest(TestCase):
        output = model(torch.randn(1, 5).to(device))
        torch._lazy.mark_step()

-
    def test_get_python_frames(self):
        # We only care about the first "Python Stacktrace" part of the saved
        # graph. However, we cannot save the whole stack for comparison given
        # it depends on a lot of things.
-        partial_graph = (r"Python Stacktrace:.*"
-                         r"mark_step \(.*/_lazy/__init__.py:[0-9]+\).*"
-                         r"_run_linear \(.*lazy/test_debug_util.py:[0-9]+\).*"
-                         r"test_get_python_frames \(.*lazy/test_debug_util.py:[0-9]+\)")
+        partial_graph = (
+            r"Python Stacktrace:.*"
+            r"mark_step \(.*/_lazy/__init__.py:[0-9]+\).*"
+            r"_run_linear \(.*lazy/test_debug_util.py:[0-9]+\).*"
+            r"test_get_python_frames \(.*lazy/test_debug_util.py:[0-9]+\)"
+        )

        with tempfile.NamedTemporaryFile(mode="r+", encoding="utf-8") as graph_file:
            os.environ["LTC_SAVE_TENSORS_FILE"] = graph_file.name
--- a/test/lazy/test_extract_compiled_graph.py
+++ b/test/lazy/test_extract_compiled_graph.py
@ -3,37 +3,44 @@
 import unittest

 from torch._lazy.ts_backend import init as init_ts_backend
+
 init_ts_backend()
-from torch._lazy import config
-from torch._lazy.extract_compiled_graph import extract_compiled_graph
-import torch
-from torch import nn
+import copy
 import dis
 import inspect
-from torch import fx
 import re
 from contextlib import contextmanager
-import copy
+
+import torch
+from torch import fx, nn
+from torch._lazy import config
+from torch._lazy.extract_compiled_graph import extract_compiled_graph
+

 class ModuleConstScale(nn.Module):
    def forward(self, a):
        return a * 2

+
 class ModuleSub(nn.Module):
    def forward(self, a, b):
        return a - b

+
 class ModuleAddcmul(nn.Module):
    """
    addcmul function takes a at::Scalar which results in a special TSData containing a Scalar rather than a Tensor.
    """
+
    def forward(self, a, b, c):
        return torch.addcmul(a, b, c, value=5)

+
 class ModuleReturnMulti(nn.Module):
    def forward(self, a, b):
        return (b + 1, a - 1)

+
 # The default fx tracer will convert torch.randn to a constant.. We may need
 # a custom tracer.
 # class ModuleEagerTensor(nn.Module):
@ -58,21 +65,25 @@ class ModuleReturnMulti(nn.Module):
 #     def forward(self):
 #         return torch.tensor((2, 3), dtype=torch.float32)

+
 class ModuleReturnDupTensor(nn.Module):
    """
    Handle the corner case that the same tensor appears multiple times in the
    returned tuple. torchbench like drq will hit this corner case when running
    thru torchdynamo..
    """
+
    def forward(self, a, b):
        c = a + b
        return a - b, c, a + 1, c

+
 class ModuleInplaceUpdate(nn.Module):
    def forward(self, a, b):
        a.sub_(b)
        return b - 1, b + 1

+
@contextmanager
 def force_fallback_ctx_mgr(fallback_op):
    oldconfig = config.get_force_fallback()
@ -82,6 +93,7 @@ def force_fallback_ctx_mgr(fallback_op):
    finally:
        config.set_force_fallback(oldconfig)

+
@contextmanager
 def nop_ctx_mgr():
    try:
@ -89,27 +101,33 @@ def nop_ctx_mgr():
    finally:
        pass

+
 def gen_rand_args(mod):
    args = []
    for _ in range(len(inspect.signature(mod.forward).parameters)):
        args.append(torch.randn(2, 3))
    return args

+
 def allclose(expected, actual):
    def unwrap(cont):
        if isinstance(cont, (list, tuple)) and len(cont) == 1:
            return cont[0]
        return cont
+
    expected = unwrap(expected)
    actual = unwrap(actual)

    if isinstance(expected, torch.Tensor) and isinstance(actual, torch.Tensor):
        return torch.allclose(expected, actual)
    elif isinstance(expected, (tuple, list)) and isinstance(actual, (tuple, list)):
-        return len(expected) == len(actual) and all(torch.allclose(a, b) for a, b in zip(expected, actual))
+        return len(expected) == len(actual) and all(
+            torch.allclose(a, b) for a, b in zip(expected, actual)
+        )
    else:
        raise RuntimeError("Unexpected types")

+
 def verify_reusing_compiled_graph(mod, exception_msg_pattern, ncase=10):
    args = gen_rand_args(mod)
    out = mod(*args)
@ -123,13 +141,17 @@ def verify_reusing_compiled_graph(mod, exception_msg_pattern, ncase=10):
            raise e  # reraise the exception
        exception_message = str(e)
        if not re.search(exception_msg_pattern, exception_message):
-            raise RuntimeError(f"Exception message does not match the required pattern: {exception_message}") from e
+            raise RuntimeError(
+                f"Exception message does not match the required pattern: {exception_message}"
+            ) from e
        else:
            # We are done for the test case that expects an exception
            return

    if exception_msg_pattern is not None:
-        raise RuntimeError(f"Expect an exception matching pattern {exception_msg_pattern}")
+        raise RuntimeError(
+            f"Expect an exception matching pattern {exception_msg_pattern}"
+        )
    print("return value of optimized_mod", optimized_mod(*args))

    # check correctness
@ -148,13 +170,16 @@ def verify_reusing_compiled_graph(mod, exception_msg_pattern, ncase=10):
        # make sure arguments match after calling the model forward method to handle inplace
        # updates.
        if not allclose(rand_args, rand_args_copy):
-            print(f"Incorrect updated arguments. expected {rand_args}, actual {rand_args_copy}")
+            print(
+                f"Incorrect updated arguments. expected {rand_args}, actual {rand_args_copy}"
+            )
            failed_index.append(i)
            continue

    if len(failed_index) > 0:
        raise RuntimeError(f"Failed {len(failed_index)}/{ncase} cases")

+
 def maketest(module_cls, exception_msg_pattern=None, ctxmgr=None):
    def wrapper(self):
        nonlocal ctxmgr
@ -165,11 +190,16 @@ def maketest(module_cls, exception_msg_pattern=None, ctxmgr=None):

    return wrapper

+
 class OptimizeTest(unittest.TestCase):
    test_sub = maketest(ModuleSub)
    # Same as test_sub but force aten::sub to fallback
    # We expect an exception caught because of LTC fallabck.
-    test_ltc_fallback = maketest(ModuleSub, exception_msg_pattern="fallback.*aten::sub", ctxmgr=force_fallback_ctx_mgr("aten::sub"))
+    test_ltc_fallback = maketest(
+        ModuleSub,
+        exception_msg_pattern="fallback.*aten::sub",
+        ctxmgr=force_fallback_ctx_mgr("aten::sub"),
+    )
    test_const_scale = maketest(ModuleConstScale)
    test_addcmul = maketest(ModuleAddcmul)
    test_return_multi = maketest(ModuleReturnMulti)
--- a/test/lazy/test_meta_kernel.py
+++ b/test/lazy/test_meta_kernel.py
@ -1,16 +1,16 @@
 # Owner(s): ["oncall: jit"]

 import torch
-
-from torch.testing._internal.common_utils import TestCase
-from torch import float32, float16
 import torch._lazy
 import torch._lazy.ts_backend
+from torch import float16, float32
+
+from torch.testing._internal.common_utils import TestCase

 torch._lazy.ts_backend.init()

-class TestMetaKernel(TestCase):

+class TestMetaKernel(TestCase):
    def test_addmm_invalid_dtype(self):
        """Tests that the addmm meta kernel returns the correct output type"""
        input = torch.ones(2, 2, dtype=torch.float16).to("lazy")
@ -35,5 +35,5 @@ class TestMetaKernel(TestCase):
        self.assertEqual(out_bias.dtype, torch.float16)

    def test_add_invalid_device(self):
-        with self.assertRaisesRegex(RuntimeError, '.*not a lazy tensor.*'):
+        with self.assertRaisesRegex(RuntimeError, ".*not a lazy tensor.*"):
            _ = torch.tensor([1], device="cpu") + torch.tensor([1], device="lazy")
--- a/test/lazy/test_reuse_ir.py
+++ b/test/lazy/test_reuse_ir.py
@ -1,20 +1,23 @@
 # Owner(s): ["oncall: jit"]

+import os
+import unittest
+
 import torch
 import torch._lazy
 import torch._lazy.config
 import torch._lazy.ir_cache
-import torch._lazy.ts_backend
 import torch._lazy.metrics as metrics
+import torch._lazy.ts_backend
 from torch.testing._internal.common_utils import IS_WINDOWS, run_tests, TestCase
-import os
-import unittest

 torch._lazy.ts_backend.init()
 torch._lazy.config.set_reuse_ir(True)

+
 def get_test_device():
-    return 'cuda' if 'LTC_TS_CUDA' in os.environ else 'cpu'
+    return "cuda" if "LTC_TS_CUDA" in os.environ else "cpu"
+

@unittest.skipIf(IS_WINDOWS, "To be fixed")
 class TestLazyReuseIr(TestCase):
@ -24,16 +27,16 @@ class TestLazyReuseIr(TestCase):
        y = torch.randn(2, 3, 4, device=device)
        z = torch.zeros(2, 3, 4, device=device)

-        device = 'lazy'
+        device = "lazy"
        x_lazy = x.detach().clone().to(device=device)
        y_lazy = y.detach().clone().to(device=device)
        z_lazy = z.detach().clone().to(device=device)

        for i in range(10):
-            z += (x + y)
+            z += x + y

        for i in range(10):
-            z_lazy += (x_lazy + y_lazy)
+            z_lazy += x_lazy + y_lazy
            torch._lazy.mark_step()

        torch.testing.assert_close(z.cpu(), z_lazy.cpu())
@ -47,22 +50,22 @@ class TestLazyReuseIr(TestCase):
        y = torch.randn(2, 3, 4, device=device)
        z = torch.zeros(2, 3, 4, device=device)

-        device = 'lazy'
+        device = "lazy"
        x_lazy = x.detach().clone().to(device=device)
        y_lazy = y.detach().clone().to(device=device)
        z_lazy = z.detach().clone().to(device=device)

        for i in range(10):
            if i < 5:
-                z += (x + y)
+                z += x + y
            else:
-                z += (x - y)
+                z += x - y

        for i in range(10):
            if i < 5:
-                z_lazy += (x_lazy + y_lazy)
+                z_lazy += x_lazy + y_lazy
            else:
-                z_lazy += (x_lazy - y_lazy)
+                z_lazy += x_lazy - y_lazy
            torch._lazy.mark_step()

        torch.testing.assert_close(z.cpu(), z_lazy.cpu())
@ -77,22 +80,22 @@ class TestLazyReuseIr(TestCase):
        y = torch.randn(2, 3, 4, device=device)
        z = torch.zeros(2, 3, 4, device=device)

-        device = 'lazy'
+        device = "lazy"
        x_lazy = x.detach().clone().to(device=device)
        y_lazy = y.detach().clone().to(device=device)
        z_lazy = z.detach().clone().to(device=device)

        for i in range(10):
            if i < 5:
-                z += (x + y)
+                z += x + y
            else:
-                z += (x - y)
+                z += x - y

        for i in range(10):
            if i < 5:
-                z_lazy += (x_lazy + y_lazy)
+                z_lazy += x_lazy + y_lazy
            else:
-                z_lazy += (x_lazy - y_lazy)
+                z_lazy += x_lazy - y_lazy
            torch._lazy.mark_step()

        torch.testing.assert_close(z.cpu(), z_lazy.cpu())
@ -110,16 +113,24 @@ class TestLazyReuseIr(TestCase):
        for i in range(10):
            # BatchNorm2d does extra checks on dimensions which SymInts don't support yet
            # so we call `torch.ops.aten.native_batch_norm` to bypass the checks.
-            z, _, _ = torch.ops.aten.native_batch_norm(x, weight, bias, None, None, True, 0.1, 1e-5)
-            z_legit, _, _ = torch.ops.aten._native_batch_norm_legit(x, weight, bias, True, 0.1, 1e-5)
+            z, _, _ = torch.ops.aten.native_batch_norm(
+                x, weight, bias, None, None, True, 0.1, 1e-5
+            )
+            z_legit, _, _ = torch.ops.aten._native_batch_norm_legit(
+                x, weight, bias, True, 0.1, 1e-5
+            )

        device = "lazy"
        x_lazy = x.detach().clone().to(device=device)
        weight_lazy = weight.detach().clone().to(device=device)
        bias_lazy = bias.detach().clone().to(device=device)
        for i in range(10):
-            z_lazy, _, _ = torch.ops.aten.native_batch_norm(x_lazy, weight_lazy, bias_lazy, None, None, True, 0.1, 1e-5)
-            z_legit_lazy, _, _ = torch.ops.aten._native_batch_norm_legit(x_lazy, weight_lazy, bias_lazy, True, 0.1, 1e-5)
+            z_lazy, _, _ = torch.ops.aten.native_batch_norm(
+                x_lazy, weight_lazy, bias_lazy, None, None, True, 0.1, 1e-5
+            )
+            z_legit_lazy, _, _ = torch.ops.aten._native_batch_norm_legit(
+                x_lazy, weight_lazy, bias_lazy, True, 0.1, 1e-5
+            )
            torch._lazy.mark_step()

        torch.testing.assert_close(z.cpu(), z_lazy.cpu())
@ -129,5 +140,5 @@ class TestLazyReuseIr(TestCase):
        torch._lazy.ir_cache.reset()


-if __name__ == '__main__':
+if __name__ == "__main__":
    run_tests()
--- a/test/lazy/test_ts_opinfo.py
+++ b/test/lazy/test_ts_opinfo.py
@ -1,109 +1,125 @@
 # Owner(s): ["oncall: jit"]

-from typing import Sequence
-import torch
 import functools
+import itertools
+import os
+import pathlib
+from typing import Sequence
+from unittest import skip
+
+import torch
+import torch._lazy
+import torch._lazy.config
+import torch._lazy.ir_cache
+import torch._lazy.metrics
+import torch._lazy.ts_backend
+import yaml
+from torch.testing._internal.common_device_type import (
+    instantiate_device_type_tests,
+    ops,
+)
+from torch.testing._internal.common_methods_invocations import op_db

 from torch.testing._internal.common_utils import run_tests, TestCase
 from torch.testing._internal.jit_utils import JitTestCase
-from torch.testing._internal.common_methods_invocations import op_db
-from torch.testing._internal.common_device_type import ops, instantiate_device_type_tests
-import torch._lazy
-import torch._lazy.config
-import torch._lazy.metrics
-import torch._lazy.ir_cache
-import torch._lazy.ts_backend
-import itertools
-import yaml
-import os
-import pathlib
-from unittest import skip

 torch._lazy.ts_backend.init()

+
 def get_test_device():
-    return 'cuda' if 'LTC_TS_CUDA' in os.environ else 'cpu'
+    return "cuda" if "LTC_TS_CUDA" in os.environ else "cpu"
+

 def remove_suffixes(l):
    return [x.split(".")[0] for x in l]

+
 def init_lists():
    path_to_script = pathlib.Path(os.path.abspath(os.path.dirname(__file__)))
-    TS_NATIVE_FUNCTIONS_PATH = path_to_script.parent.parent / "aten/src/ATen/native/ts_native_functions.yaml"
+    TS_NATIVE_FUNCTIONS_PATH = (
+        path_to_script.parent.parent / "aten/src/ATen/native/ts_native_functions.yaml"
+    )
    with open(TS_NATIVE_FUNCTIONS_PATH) as f:
        yaml_ts = yaml.load(f, yaml.SafeLoader)
-    LAZY_OPS_LIST = set(remove_suffixes(itertools.chain(yaml_ts["full_codegen"], yaml_ts["supported"], yaml_ts["autograd"])))
+    LAZY_OPS_LIST = set(
+        remove_suffixes(
+            itertools.chain(
+                yaml_ts["full_codegen"], yaml_ts["supported"], yaml_ts["autograd"]
+            )
+        )
+    )
    HAS_SYMINT_SUFFIX = yaml_ts["symint"]
    FALLBACK_LIST = {"clamp"}
    SKIP_RUNTIME_ERROR_LIST = {
-        'index_select',  # Empty output_sizes is not supported
-        'clone',  # is clone decomposed?
-
+        "index_select",  # Empty output_sizes is not supported
+        "clone",  # is clone decomposed?
        # General ASAN Failure due to related to generating bool values.
        # https://github.com/pytorch/pytorch/issues/74519
        # https://github.com/pytorch/pytorch/issues/63034
-        'nonzero',  # ASAN failure (paste: P501906539)
-        'all',  # ASAN failure
-        'any',  # ASAN failure
-        'logdet',  # ASAN failure
+        "nonzero",  # ASAN failure (paste: P501906539)
+        "all",  # ASAN failure
+        "any",  # ASAN failure
+        "logdet",  # ASAN failure
    }
    SKIP_INCORRECT_RESULTS_LIST = {
-        'squeeze',  # Value out of range
-        't',  # Value out of range
-        'transpose',  # Value out of range
-        'bernoulli',  # incorrect results
-        'pow',  # incorrect results
-        'addcdiv',  # incorrect results (on CI not locally?)
+        "squeeze",  # Value out of range
+        "t",  # Value out of range
+        "transpose",  # Value out of range
+        "bernoulli",  # incorrect results
+        "pow",  # incorrect results
+        "addcdiv",  # incorrect results (on CI not locally?)
    }
    # The following ops all show up directly in ts_native_functions.yaml,
    # but run functionalized versions of the composite kernels in core.
    # This means that we don't expect the ops to show directly in the LTC metrics.
    FUNCTIONAL_DECOMPOSE_LIST = {
-        'diag_embed',
-        'block_diag',
-        'new_empty_strided',
-        'narrow_copy',
-        'pixel_shuffle',
-        'pixel_unshuffle',
-        'select_backward',
-        '_trilinear',
-        'linalg_inv_ex',
-        'linalg_pinv.atol_rtol_tensor',
-        'logsumexp',
+        "diag_embed",
+        "block_diag",
+        "new_empty_strided",
+        "narrow_copy",
+        "pixel_shuffle",
+        "pixel_unshuffle",
+        "select_backward",
+        "_trilinear",
+        "linalg_inv_ex",
+        "linalg_pinv.atol_rtol_tensor",
+        "logsumexp",
    }
    # For some ops, we don't support all variants. Here we use formatted_name
    # to uniquely identify the variant.
-    SKIP_VARIANT_LIST = {
-        'norm_nuc',
-        'min_reduction_with_dim'
-    }
+    SKIP_VARIANT_LIST = {"norm_nuc", "min_reduction_with_dim"}

-    return (LAZY_OPS_LIST,
-            FALLBACK_LIST,
-            SKIP_RUNTIME_ERROR_LIST,
-            SKIP_INCORRECT_RESULTS_LIST,
-            FUNCTIONAL_DECOMPOSE_LIST,
-            HAS_SYMINT_SUFFIX,
-            SKIP_VARIANT_LIST)
+    return (
+        LAZY_OPS_LIST,
+        FALLBACK_LIST,
+        SKIP_RUNTIME_ERROR_LIST,
+        SKIP_INCORRECT_RESULTS_LIST,
+        FUNCTIONAL_DECOMPOSE_LIST,
+        HAS_SYMINT_SUFFIX,
+        SKIP_VARIANT_LIST,
+    )

-(LAZY_OPS_LIST,
- FALLBACK_LIST,
- SKIP_RUNTIME_ERROR_LIST,
- SKIP_INCORRECT_RESULTS_LIST,
- FUNCTIONAL_DECOMPOSE_LIST,
- HAS_SYMINT_SUFFIX,
- SKIP_VARIANT_LIST) = init_lists()
+
+(
+    LAZY_OPS_LIST,
+    FALLBACK_LIST,
+    SKIP_RUNTIME_ERROR_LIST,
+    SKIP_INCORRECT_RESULTS_LIST,
+    FUNCTIONAL_DECOMPOSE_LIST,
+    HAS_SYMINT_SUFFIX,
+    SKIP_VARIANT_LIST,
+) = init_lists()

 torch.manual_seed(42)

+
 def clone_move(t):
-    dev = 'lazy'
+    dev = "lazy"
    copy_t = t.detach().clone().requires_grad_(True).to(device=dev)
    return copy_t

+
 class TestLazyTensor(JitTestCase):
-
-
    @skip("Disable until autograd supports symints")
    def testConvolutionBackward(self):
        test_device = get_test_device()
@ -118,12 +134,15 @@ class TestLazyTensor(JitTestCase):

        # run eager
        conv_out = torch.nn.functional.conv2d(inp, weight, bias)
-        (inp_grad, weight_grad, bias_grad) = torch.autograd.grad([conv_out], [inp, weight, bias], [grad])
+        (inp_grad, weight_grad, bias_grad) = torch.autograd.grad(
+            [conv_out], [inp, weight, bias], [grad]
+        )

        # run lazy
        conv_copy_out = torch.nn.functional.conv2d(inp_copy, weight_copy, bias_copy)
        (inp_copy_grad, weight_copy_grad, bias_copy_grad) = torch.autograd.grad(
-            [conv_copy_out], [inp_copy, weight_copy, bias_copy], [grad_copy])
+            [conv_copy_out], [inp_copy, weight_copy, bias_copy], [grad_copy]
+        )

        # check numerics
        torch.testing.assert_close(bias_copy_grad.cpu(), bias_grad.cpu())
@ -148,7 +167,6 @@ class TestLazyTensor(JitTestCase):
            y.add_(1)
            return x

-
        out_ref = foo(inp, mark_step=False)
        out = foo(inp_lazy, mark_step=True)
        # out will have some pending mutations, which will be synced by the .cpu() call.
@ -157,7 +175,7 @@ class TestLazyTensor(JitTestCase):
    def test_tensor_ctr(self):
        test_device = get_test_device()
        inp = torch.tensor([[1, 2, 3, 4, 5]], device=test_device)
-        inp_lazy = torch.tensor([[1, 2, 3, 4, 5]], device='lazy')
+        inp_lazy = torch.tensor([[1, 2, 3, 4, 5]], device="lazy")

        def foo(x):
            # Calling a view op to ensure that functionalization wrapping occurs.
@ -169,19 +187,23 @@ class TestLazyTensor(JitTestCase):


 class TestLazyOpInfo(TestCase):
-
-    @ops([op for op in op_db
-          if op.name in LAZY_OPS_LIST
-          and op.name not in SKIP_RUNTIME_ERROR_LIST
-          and op.name not in FUNCTIONAL_DECOMPOSE_LIST
-          and op.formatted_name not in SKIP_VARIANT_LIST
-          ], allowed_dtypes=(torch.float,))
+    @ops(
+        [
+            op
+            for op in op_db
+            if op.name in LAZY_OPS_LIST
+            and op.name not in SKIP_RUNTIME_ERROR_LIST
+            and op.name not in FUNCTIONAL_DECOMPOSE_LIST
+            and op.formatted_name not in SKIP_VARIANT_LIST
+        ],
+        allowed_dtypes=(torch.float,),
+    )
    def test_dispatched_to_lazy(self, device, dtype, op):
        def get_name(op):
            l = [op.name]
-            if op.variant_test_name != '':
+            if op.variant_test_name != "":
                l.append(op.variant_test_name)
-            return '.'.join(l)
+            return ".".join(l)

        global HAS_SYMINT_SUFFIX, FALLBACK_LIST
        samples = op.sample_inputs("lazy", dtype, requires_grad=False)
@ -197,20 +219,31 @@ class TestLazyOpInfo(TestCase):
        torch._lazy.wait_device_ops()
        prefix = "aten" if op.name in FALLBACK_LIST else "lazy"
        symint_suffix = "_symint" if op.name in HAS_SYMINT_SUFFIX else ""
-        found = f"{prefix}::{op.name}{symint_suffix}" in remove_suffixes(torch._lazy.metrics.counter_names())
+        found = f"{prefix}::{op.name}{symint_suffix}" in remove_suffixes(
+            torch._lazy.metrics.counter_names()
+        )
        # check aliases
        if not found:
            for alias in op.aliases:
-                alias_found = f"{prefix}::{alias.name}{symint_suffix}" in remove_suffixes(torch._lazy.metrics.counter_names())
+                alias_found = (
+                    f"{prefix}::{alias.name}{symint_suffix}"
+                    in remove_suffixes(torch._lazy.metrics.counter_names())
+                )
                found = found or alias_found
                if found:
                    break
        self.assertTrue(found)

-
-    @ops([op for op in op_db if op.name in LAZY_OPS_LIST and op.name not in SKIP_RUNTIME_ERROR_LIST | SKIP_INCORRECT_RESULTS_LIST], allowed_dtypes=(torch.float,))  # noqa: B950
+    @ops(
+        [
+            op
+            for op in op_db
+            if op.name in LAZY_OPS_LIST
+            and op.name not in SKIP_RUNTIME_ERROR_LIST | SKIP_INCORRECT_RESULTS_LIST
+        ],
+        allowed_dtypes=(torch.float,),
+    )  # noqa: B950
    def test_correctness(self, device, dtype, op):
-
        test_device = get_test_device()

        def clone_to_device(input, dev):
@ -224,7 +257,9 @@ class TestLazyOpInfo(TestCase):
            a, b = t
            self.assertEqual(type(a), type(b))
            if isinstance(a, torch.Tensor):
-                self.assertTrue(torch.allclose(clone_to_device(a, test_device), b, atol=1e-4))
+                self.assertTrue(
+                    torch.allclose(clone_to_device(a, test_device), b, atol=1e-4)
+                )

            if isinstance(a, Sequence):
                map(assert_allclose_rec, zip(a, b))
@ -244,7 +279,15 @@ class TestLazyOpInfo(TestCase):
            torch._lazy.mark_step()
            assert_allclose_rec((r_actual, r_exp))

-    @ops([op for op in op_db if op.name in LAZY_OPS_LIST and op.name not in SKIP_RUNTIME_ERROR_LIST | SKIP_INCORRECT_RESULTS_LIST], allowed_dtypes=(torch.float,))  # noqa: B950
+    @ops(
+        [
+            op
+            for op in op_db
+            if op.name in LAZY_OPS_LIST
+            and op.name not in SKIP_RUNTIME_ERROR_LIST | SKIP_INCORRECT_RESULTS_LIST
+        ],
+        allowed_dtypes=(torch.float,),
+    )  # noqa: B950
    def test_correctness_with_reusing_ir(self, device, dtype, op):
        torch._lazy.config.set_reuse_ir(True)
        test_device = get_test_device()
@ -260,7 +303,9 @@ class TestLazyOpInfo(TestCase):
            a, b = t
            self.assertEqual(type(a), type(b))
            if isinstance(a, torch.Tensor):
-                self.assertTrue(torch.allclose(clone_to_device(a, test_device), b, atol=1e-4))
+                self.assertTrue(
+                    torch.allclose(clone_to_device(a, test_device), b, atol=1e-4)
+                )

            if isinstance(a, Sequence):
                map(assert_allclose_rec, zip(a, b))
@ -284,7 +329,6 @@ class TestLazyOpInfo(TestCase):
        torch._lazy.config.set_reuse_ir(False)


-
 # TODO: after we move to master, add Lazy as a new Device here:
 # https://github.com/pytorch/pytorch/blob/master/torch/testing/_internal/common_device_type.py#L532
 instantiate_device_type_tests(TestLazyOpInfo, globals(), only_for="cpu")
@ -306,7 +350,9 @@ class TestLazyDynamicOps(TestCase):
    def test_nonzero_dynamic(self):
        # Test that nonzero gives upper bounds sizes when symbolic shape mode is enabled
        test_device = get_test_device()
-        x1 = torch.tensor([[0, 1.0, 2.0], [3.0, 0, 0]], device=test_device, requires_grad=True)
+        x1 = torch.tensor(
+            [[0, 1.0, 2.0], [3.0, 0, 0]], device=test_device, requires_grad=True
+        )
        x1_lazy = clone_move(x1)
        x2_lazy = torch.nonzero(x1_lazy)

@ -328,5 +374,6 @@ class TestLazyDynamicOps(TestCase):

        self.assertEqual(out_cpu.shape, out_lazy.shape)

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    run_tests()
--- a/test/linear.py
+++ b/test/linear.py
@ -1,4 +1,6 @@
 import torch
+
+
 class LinearMod(torch.nn.Linear):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
@ -6,4 +8,5 @@ class LinearMod(torch.nn.Linear):
    def forward(self, input):
        return torch._C._nn.linear(input, self.weight, self.bias)

+
 print(torch.jit.trace(LinearMod(20, 20), torch.rand([20, 20])).graph)
--- a/test/load_torchscript_model.py
+++ b/test/load_torchscript_model.py
@ -1,7 +1,8 @@
 import sys
+
 import torch

-if __name__ == '__main__':
+if __name__ == "__main__":
    script_mod = torch.jit.load(sys.argv[1])
    mod = torch.load(sys.argv[1] + ".orig")
    print(script_mod)
--- a/test/mkl_verbose.py
+++ b/test/mkl_verbose.py
@ -1,13 +1,16 @@
 import argparse
+
 import torch

+
 def run_model(level):
    m = torch.nn.Linear(20, 30)
    input = torch.randn(128, 20)
    with torch.backends.mkl.verbose(level):
        m(input)

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose-level", default=0, type=int)
    args = parser.parse_args()
--- a/test/mkldnn_verbose.py
+++ b/test/mkldnn_verbose.py
@ -1,6 +1,8 @@
 import argparse
+
 import torch

+
 class Module(torch.nn.Module):
    def __init__(self):
        super().__init__()
@ -10,13 +12,15 @@ class Module(torch.nn.Module):
        y = self.conv(x)
        return y

+
 def run_model(level):
    m = Module().eval()
    d = torch.rand(1, 1, 112, 112)
    with torch.backends.mkldnn.verbose(level):
        m(d)

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--verbose-level", default=0, type=int)
    args = parser.parse_args()