Enable ufmt format on test files (#126845)

Fixes some files in #123062 Run lintrunner on files: test/test_nnapi.py, test/test_numba_integration.py, test/test_numpy_interop.py, test/test_openmp.py, test/test_optim.py ```bash $ lintrunner -a --take UFMT --all-files ok No lint issues. Successfully applied all patches. ``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/126845 Approved by: https://github.com/ezyang
2025-10-20 21:14:14 +08:00 · 2024-05-28 01:42:06 +00:00
parent 57000708fc
commit 8979412442
6 changed files with 800 additions and 406 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -1103,13 +1103,6 @@ exclude_patterns = [
    'test/test_native_mha.py',
    'test/test_nestedtensor.py',
    'test/test_nn.py',
-    'test/test_nnapi.py',
-    'test/test_numba_integration.py',
-    'test/test_numpy_interop.py',
-    'test/test_nvfuser_dynamo.py',
-    'test/test_nvfuser_frontend.py',
-    'test/test_openmp.py',
-    'test/test_optim.py',
    'test/test_out_dtype_op.py',
    'test/test_overrides.py',
    'test/test_prims.py',
--- a/test/test_nnapi.py
+++ b/test/test_nnapi.py
@ -1,14 +1,16 @@
 #!/usr/bin/env python3
 # Owner(s): ["oncall: mobile"]

-import os
 import ctypes
-import torch
+import os
 import unittest
 from typing import Tuple
+
+import torch
 from torch.backends._nnapi.prepare import convert_model_to_nnapi
 from torch.testing._internal.common_quantized import supported_qengines
-from torch.testing._internal.common_utils import TestCase, run_tests
+from torch.testing._internal.common_utils import run_tests, TestCase
+

 def qpt(t, scale, zero_point, dtype=torch.quint8):
    t = torch.tensor(t)
@ -21,13 +23,14 @@ def nhwc(t):
    return t


-@unittest.skipUnless('qnnpack' in supported_qengines,
-                     "This Pytorch Build has not been built with or does not support QNNPACK")
+@unittest.skipUnless(
+    "qnnpack" in supported_qengines,
+    "This Pytorch Build has not been built with or does not support QNNPACK",
+)
 class TestNNAPI(TestCase):
-
    def setUp(self):
        # Avoid saturation in fbgemm
-        torch.backends.quantized.engine = 'qnnpack'
+        torch.backends.quantized.engine = "qnnpack"

        libneuralnetworks_path = os.environ.get("LIBNEURALNETWORKS_PATH")
        if libneuralnetworks_path:
@ -54,7 +57,7 @@ class TestNNAPI(TestCase):
        convert_args=None,
        atol_rtol=None,
        limit=None,
-        expected_memory_format=None
+        expected_memory_format=None,
    ):
        with torch.no_grad():
            if isinstance(arg_or_args, torch.Tensor):
@ -75,15 +78,17 @@ class TestNNAPI(TestCase):
                kwargs["rtol"] = atol_rtol[1]
            self.assertEqual(eager_output, nnapi_output, **kwargs)
            if limit is not None:
-                mismatches = \
-                    eager_output.int_repr().to(torch.int32) - \
-                    nnapi_output.int_repr().to(torch.int32)
+                mismatches = eager_output.int_repr().to(
+                    torch.int32
+                ) - nnapi_output.int_repr().to(torch.int32)
                if mismatches.count_nonzero() > limit:
                    # Too many mismatches.  Re-run the check with no tolerance
                    # to get a nice message.
                    self.assertEqual(eager_output, nnapi_output, atol=0, rtol=0)
            if expected_memory_format:
-                self.assertTrue(nnapi_output.is_contiguous(memory_format=expected_memory_format))
+                self.assertTrue(
+                    nnapi_output.is_contiguous(memory_format=expected_memory_format)
+                )

    def float_and_quant_and_nhwc(self, inp_float, scale, zero_point):
        torch.manual_seed(29)
@ -101,7 +106,7 @@ class TestNNAPI(TestCase):
        self.check(single_a, arg)
        multi_a = torch.nn.PReLU(4)
        with torch.no_grad():
-            multi_a.weight.copy_(torch.tensor([.1, .2, .3, .4]))
+            multi_a.weight.copy_(torch.tensor([0.1, 0.2, 0.3, 0.4]))
        self.check(multi_a, nhwc(arg))

        # Test flexible size
@ -115,12 +120,13 @@ class TestNNAPI(TestCase):
    def test_quantize(self):
        self.check(
            torch.ao.nn.quantized.Quantize(0.25, 2, torch.quint8),
-            nhwc(torch.tensor([[[[1.0]], [[2.0]]]])))
+            nhwc(torch.tensor([[[[1.0]], [[2.0]]]])),
+        )

    def test_dequantize(self):
        self.check(
-            torch.ao.nn.quantized.DeQuantize(),
-            nhwc(qpt([[[[1.0]], [[2.0]]]], 0.25, 2)))
+            torch.ao.nn.quantized.DeQuantize(), nhwc(qpt([[[[1.0]], [[2.0]]]], 0.25, 2))
+        )

    def test_unsqueeze(self):
        class UnsqueezeModule(torch.nn.Module):
@ -146,18 +152,12 @@ class TestNNAPI(TestCase):
            def forward(self, arg):
                return arg.reshape(self.shape)

-        self.check(
-            ReshapeModule((2, 4)),
-            torch.randn(4, 2, 1, 1))
+        self.check(ReshapeModule((2, 4)), torch.randn(4, 2, 1, 1))

-        self.check(
-            ReshapeModule((8, -1)),
-            nhwc(torch.randn(4, 2, 1, 1)))
+        self.check(ReshapeModule((8, -1)), nhwc(torch.randn(4, 2, 1, 1)))

        with self.assertRaisesRegex(Exception, "target size"):
-            self.check(
-                ReshapeModule((2, 4)),
-                nhwc(torch.randn(4, 2, 1, 1)))
+            self.check(ReshapeModule((2, 4)), nhwc(torch.randn(4, 2, 1, 1)))

    def test_flatten(self):
        for mod in [
@ -165,8 +165,7 @@ class TestNNAPI(TestCase):
            torch.nn.Flatten(start_dim=2, end_dim=3),
            torch.nn.Flatten(start_dim=2, end_dim=4),
            torch.nn.Flatten(start_dim=0, end_dim=-2),
-            torch.nn.Flatten(start_dim=0, end_dim=4)
-
+            torch.nn.Flatten(start_dim=0, end_dim=4),
        ]:
            self.check(mod, torch.randn(4, 2, 1, 3, 7))

@ -174,31 +173,24 @@ class TestNNAPI(TestCase):
        self.check(
            torch.nn.Flatten(),
            torch.randn(4, 2, 1, 3, 7),
-            convert_args=[torch.zeros(0, 2, 1, 3, 7)]
+            convert_args=[torch.zeros(0, 2, 1, 3, 7)],
        )

        # channels last
-        self.check(
-            torch.nn.Flatten(),
-            nhwc(torch.randn(2, 1, 4, 7))
-        )
-        self.check(
-            torch.nn.Flatten(),
-            nhwc(torch.randn(2, 3, 1, 1))
-        )
+        self.check(torch.nn.Flatten(), nhwc(torch.randn(2, 1, 4, 7)))
+        self.check(torch.nn.Flatten(), nhwc(torch.randn(2, 3, 1, 1)))

        # Exceptions
        with self.assertRaisesRegex(Exception, "not supported on NHWC"):
-            self.check(
-                torch.nn.Flatten(),
-                nhwc(torch.randn(1, 3, 4, 4))
-            )
-        with self.assertRaisesRegex(Exception, "Flattening flexible dims is not supported yet"):
+            self.check(torch.nn.Flatten(), nhwc(torch.randn(1, 3, 4, 4)))
+        with self.assertRaisesRegex(
+            Exception, "Flattening flexible dims is not supported yet"
+        ):
            self.check(torch.nn.Flatten(), torch.randn(4, 2, 0, 0, 7))
        with self.assertRaisesRegex(Exception, "Only 1 dim"):
            self.check(
-                torch.nn.Flatten(start_dim=1, end_dim=-2),
-                torch.randn(0, 2, 1, 3, 0))
+                torch.nn.Flatten(start_dim=1, end_dim=-2), torch.randn(0, 2, 1, 3, 0)
+            )

    def test_slice(self):
        class SliceModule(torch.nn.Module):
@ -209,32 +201,26 @@ class TestNNAPI(TestCase):
                self.step = step

            def forward(self, t):
-                return t[1:, self.start:self.stop:self.step, :]
+                return t[1:, self.start : self.stop : self.step, :]

        class SliceModule2(torch.nn.Module):
            def forward(self, t):
                return t[3:]

-        self.check(
-            SliceModule(1, 5, 2),
-            torch.randn(4, 6, 2)
-        )
-        self.check(
-            SliceModule2(),
-            torch.randn(5)
-        )
+        self.check(SliceModule(1, 5, 2), torch.randn(4, 6, 2))
+        self.check(SliceModule2(), torch.randn(5))

        # flex inputs
        self.check(
            SliceModule(1, 5, 2),
            torch.randn(4, 6, 2),
-            convert_args=[torch.zeros(4, 6, 0)]
+            convert_args=[torch.zeros(4, 6, 0)],
        )
        with self.assertRaisesRegex(Exception, "slice with flexible shape"):
            self.check(
                SliceModule(1, 5, 2),
                torch.randn(4, 6, 2),
-                convert_args=[torch.zeros(0, 0, 0)]
+                convert_args=[torch.zeros(0, 0, 0)],
            )

    def test_cat(self):
@ -251,21 +237,8 @@ class TestNNAPI(TestCase):
            [
                torch.randn(1, 2, 3, 3),
                torch.randn(2, 2, 3, 3),
-            ])
-
-        self.check(
-            CatModule(1),
-            [
-                torch.randn(1, 2, 3, 3),
-                torch.randn(1, 4, 3, 3),
-            ])
-
-        self.check(
-            CatModule(1),
-            [
-                nhwc(torch.randn(1, 2, 3, 3)),
-                nhwc(torch.randn(1, 4, 3, 3)),
-            ])
+            ],
+        )

        self.check(
            CatModule(1),
@ -273,14 +246,29 @@ class TestNNAPI(TestCase):
                torch.randn(1, 2, 3, 3),
                torch.randn(1, 4, 3, 3),
            ],
-            convert_args=[
-                torch.zeros(0, 0, 0, 0),
-                torch.zeros(0, 0, 0, 0)
-            ])
+        )
+
+        self.check(
+            CatModule(1),
+            [
+                nhwc(torch.randn(1, 2, 3, 3)),
+                nhwc(torch.randn(1, 4, 3, 3)),
+            ],
+        )
+
+        self.check(
+            CatModule(1),
+            [
+                torch.randn(1, 2, 3, 3),
+                torch.randn(1, 4, 3, 3),
+            ],
+            convert_args=[torch.zeros(0, 0, 0, 0), torch.zeros(0, 0, 0, 0)],
+        )

    def test_pointwise_unary(self):
        for op in ["relu", "sigmoid"]:
            with self.subTest(op):
+
                class UnaryModule(torch.nn.Module):
                    def forward(self, arg):
                        if op == "relu":
@ -288,15 +276,17 @@ class TestNNAPI(TestCase):
                        if op == "sigmoid":
                            return torch.sigmoid(arg)
                        raise Exception("Bad op")  # noqa: TRY002
+
                self.check(UnaryModule(), torch.tensor([-1.0, 1.0]))
                self.check(
                    UnaryModule(),
-                    qpt(torch.tensor([-1.0, 1.0]), 1. / 256, 0),
+                    qpt(torch.tensor([-1.0, 1.0]), 1.0 / 256, 0),
                )

    def test_pointwise_binary(self):
        for op in ["add", "sub", "mul", "div"]:
            with self.subTest(op):
+
                class BinaryModule(torch.nn.Module):
                    def forward(self, lhs, rhs):
                        if op == "add":
@ -314,14 +304,16 @@ class TestNNAPI(TestCase):
                    [
                        torch.tensor([1.0, 2.0]),
                        torch.tensor([3.0, 4.0]),
-                    ])
+                    ],
+                )

                self.check(
                    BinaryModule(),
                    [
                        torch.tensor([[1.0, 2.0]]),
                        torch.tensor([[3.0, 4.0], [5.0, 6.0]]),
-                    ])
+                    ],
+                )

                with self.assertRaisesRegex(Exception, "Non-equal-rank broadcast"):
                    self.check(
@ -329,7 +321,8 @@ class TestNNAPI(TestCase):
                        [
                            torch.tensor([1.0, 2.0]),
                            torch.tensor([[3.0, 4.0], [5.0, 6.0]]),
-                        ])
+                        ],
+                    )

    def test_pointwise_binary_const(self):
        const = torch.randn(1, 4, 6, 6)
@ -349,9 +342,10 @@ class TestNNAPI(TestCase):
            for use_nhwc in [False, True]:
                with self.subTest(mod_class=mod_class.__name__, use_nhwc=use_nhwc):
                    arg = arg_nhwc if use_nhwc else arg_contig
-                    memory_format = torch.channels_last if use_nhwc else torch.contiguous_format
-                    self.check(mod_class(), arg,
-                               expected_memory_format=memory_format)
+                    memory_format = (
+                        torch.channels_last if use_nhwc else torch.contiguous_format
+                    )
+                    self.check(mod_class(), arg, expected_memory_format=memory_format)

    def test_hardtanh(self):
        inp = torch.tensor([-2.0, -0.5, 0.5, 2.0, 7.0])
@ -399,8 +393,10 @@ class TestNNAPI(TestCase):

        self.check(DetachModule(), torch.randn(1, 2, 3, 3))
        self.check(
-            DetachModule(), torch.randn(1, 2, 3, 3),
-            convert_args=[torch.zeros(1, 2, 0, 0)])
+            DetachModule(),
+            torch.randn(1, 2, 3, 3),
+            convert_args=[torch.zeros(1, 2, 0, 0)],
+        )

    def test_log_softmax(self):
        inp = torch.randn(3, 10)
@ -425,14 +421,18 @@ class TestNNAPI(TestCase):
        self.check(MeanModule([-1, -2], keep=True), nhwc(torch.randn(2, 3, 6, 6)))

    def test_max_pool2d(self):
-        for (name, inp) in self.float_and_quant_and_nhwc(torch.randn(2, 3, 12, 16), 0.3, 128):
+        for name, inp in self.float_and_quant_and_nhwc(
+            torch.randn(2, 3, 12, 16), 0.3, 128
+        ):
            with self.subTest(name):
                self.check(torch.nn.MaxPool2d(2), inp)
                self.check(torch.nn.MaxPool2d((3, 4)), inp)
                self.check(torch.nn.MaxPool2d((3, 4), (1, 2)), inp)

    def test_avg_pool2d(self):
-        for (name, inp) in self.float_and_quant_and_nhwc(torch.randn(2, 3, 12, 16), 0.3, 128):
+        for name, inp in self.float_and_quant_and_nhwc(
+            torch.randn(2, 3, 12, 16), 0.3, 128
+        ):
            with self.subTest(name):
                atol_rtol = None
                limit = None
@ -440,9 +440,10 @@ class TestNNAPI(TestCase):
                convert_arg = torch.zeros(*convert_dims)

                for model in (
-                        torch.nn.AvgPool2d(2),
-                        torch.nn.AvgPool2d((3, 4)),
-                        torch.nn.AvgPool2d((3, 4), (1, 2))):
+                    torch.nn.AvgPool2d(2),
+                    torch.nn.AvgPool2d((3, 4)),
+                    torch.nn.AvgPool2d((3, 4), (1, 2)),
+                ):
                    if "quant" in name:
                        atol_rtol = (1, 0)
                        limit = model(inp).numel()
@ -456,19 +457,25 @@ class TestNNAPI(TestCase):
                        inp,
                        convert_args=[convert_arg],
                        atol_rtol=atol_rtol,
-                        limit=limit
+                        limit=limit,
                    )

    def test_adaptive_avg_pool2d(self):
-        for (name, inp) in self.float_and_quant_and_nhwc(torch.randn(2, 3, 12, 16), 0.3, 128):
+        for name, inp in self.float_and_quant_and_nhwc(
+            torch.randn(2, 3, 12, 16), 0.3, 128
+        ):
            with self.subTest(name):
                self.check(torch.nn.AdaptiveAvgPool2d((1, 1)), inp)
                with self.assertRaisesRegex(Exception, "with output size"):
                    self.check(torch.nn.AdaptiveAvgPool2d((2, 2)), inp)

    def test_upsample_nearest2d(self):
-        convert_args = dict(self.float_and_quant_and_nhwc(torch.randn(2, 3, 0, 0), 0.3, 128))
-        for (name, inp) in self.float_and_quant_and_nhwc(torch.randn(2, 3, 12, 16), 0.3, 128):
+        convert_args = dict(
+            self.float_and_quant_and_nhwc(torch.randn(2, 3, 0, 0), 0.3, 128)
+        )
+        for name, inp in self.float_and_quant_and_nhwc(
+            torch.randn(2, 3, 12, 16), 0.3, 128
+        ):
            with self.subTest(name):
                self.check(torch.nn.UpsamplingNearest2d(size=(16, 20)), inp)
                self.check(torch.nn.UpsamplingNearest2d(size=(24, 32)), inp)
@ -478,39 +485,61 @@ class TestNNAPI(TestCase):
                self.check(torch.nn.UpsamplingNearest2d(scale_factor=(3.0, 3.0)), inp)

                self.check(
-                    torch.nn.UpsamplingNearest2d(size=(24, 32)), inp,
-                    convert_args=[convert_args[name]]
+                    torch.nn.UpsamplingNearest2d(size=(24, 32)),
+                    inp,
+                    convert_args=[convert_args[name]],
                )
                self.check(
-                    torch.nn.UpsamplingNearest2d(scale_factor=(2.0, 2.0)), inp,
-                    convert_args=[convert_args[name]]
+                    torch.nn.UpsamplingNearest2d(scale_factor=(2.0, 2.0)),
+                    inp,
+                    convert_args=[convert_args[name]],
                )

    def test_linear(self):
        torch.manual_seed(29)
        self.check(torch.nn.Linear(16, 32), torch.randn(2, 16))
        self.check(
-            torch.nn.Linear(16, 32), torch.randn(2, 16),
-            convert_args=[torch.zeros(0, 16)])
+            torch.nn.Linear(16, 32),
+            torch.randn(2, 16),
+            convert_args=[torch.zeros(0, 16)],
+        )

    def test_conv2d(self):
        cases = [
            # in_ch, out_ch, kernel, stride, padding, groups, bias, input_dim,      name
-            ( 4,     8,      (3, 3), 1,      0,       1,      1,    (2, 4, 16, 16), "3x3"),        # noqa: E201,E241
-            ( 4,     8,      (3, 3), 1,      0,       1,      0,    (2, 4, 16, 16), "3x3nobias"),  # noqa: E201,E241
-            ( 4,     16,     (3, 3), 1,      1,       1,      1,    (2, 4, 16, 16), "3x3p1"),      # noqa: E201,E241
-            ( 8,     8,      (3, 3), 2,      0,       1,      1,    (2, 8, 16, 16), "3x3s2"),      # noqa: E201,E241
-            ( 4,     8,      (5, 5), 1,      0,       1,      1,    (2, 4, 16, 16), "5x5"),        # noqa: E201,E241
-            ( 4,     4,      (3, 3), 1,      0,       4,      1,    (2, 4, 16, 16), "3x3dw"),      # noqa: E201,E241
-            ( 8,     4,      (1, 1), 1,      0,       1,      1,    (2, 8, 16, 16), "1x1"),        # noqa: E201,E241
+            (4, 8, (3, 3), 1, 0, 1, 1, (2, 4, 16, 16), "3x3"),  # noqa: E201,E241
+            (4, 8, (3, 3), 1, 0, 1, 0, (2, 4, 16, 16), "3x3nobias"),  # noqa: E201,E241
+            (4, 16, (3, 3), 1, 1, 1, 1, (2, 4, 16, 16), "3x3p1"),  # noqa: E201,E241
+            (8, 8, (3, 3), 2, 0, 1, 1, (2, 8, 16, 16), "3x3s2"),  # noqa: E201,E241
+            (4, 8, (5, 5), 1, 0, 1, 1, (2, 4, 16, 16), "5x5"),  # noqa: E201,E241
+            (4, 4, (3, 3), 1, 0, 4, 1, (2, 4, 16, 16), "3x3dw"),  # noqa: E201,E241
+            (8, 4, (1, 1), 1, 0, 1, 1, (2, 8, 16, 16), "1x1"),  # noqa: E201,E241
        ]

        for kind in ["float", "float-nhwc", "quant", "quant-nhwc"]:
            for case in cases:
-                in_ch, out_ch, kernel, stride, padding, groups, bias, input_dim, name = case
+                (
+                    in_ch,
+                    out_ch,
+                    kernel,
+                    stride,
+                    padding,
+                    groups,
+                    bias,
+                    input_dim,
+                    name,
+                ) = case
                with self.subTest(f"{kind}-{name}"):
                    inp = torch.randn(input_dim)
-                    model = torch.nn.Conv2d(in_ch, out_ch, kernel, stride, padding, groups=groups, bias=bool(bias))
+                    model = torch.nn.Conv2d(
+                        in_ch,
+                        out_ch,
+                        kernel,
+                        stride,
+                        padding,
+                        groups=groups,
+                        bias=bool(bias),
+                    )
                    output_size = model(inp).numel()
                    atol_rtol = None
                    limit = None
@ -520,7 +549,9 @@ class TestNNAPI(TestCase):
                    if "quant" in kind:
                        model = torch.nn.Sequential(model)
                        model.eval()
-                        model.qconfig = torch.ao.quantization.get_default_qconfig('qnnpack')
+                        model.qconfig = torch.ao.quantization.get_default_qconfig(
+                            "qnnpack"
+                        )
                        model = torch.ao.quantization.prepare(model)
                        model(inp)
                        model = torch.ao.quantization.convert(model)
@ -542,7 +573,7 @@ class TestNNAPI(TestCase):
                        inp,
                        convert_args=[convert_arg],
                        atol_rtol=atol_rtol,
-                        limit=limit
+                        limit=limit,
                    )

    def test_conv2d_transpose(self):
@ -562,7 +593,7 @@ class TestNNAPI(TestCase):

                if "quant" in kind:
                    model = torch.ao.nn.quantized.ConvTranspose2d(in_ch, out_ch, kernel)
-                    model.qconfig = torch.ao.quantization.get_default_qconfig('qnnpack')
+                    model.qconfig = torch.ao.quantization.get_default_qconfig("qnnpack")
                    inp = qpt(inp, 1.0 / 16, 128)
                    # I've seen numerical differences between QNNPACK and NNAPI,
                    # but never more than 1 quantum, and never more than ~10% of
@ -581,10 +612,9 @@ class TestNNAPI(TestCase):
                    inp,
                    convert_args=[convert_arg],
                    atol_rtol=atol_rtol,
-                    limit=limit
+                    limit=limit,
                )

-
    def test_qadd(self):
        func = torch.ao.nn.quantized.QFunctional()
        func.scale = 0.5
@ -602,14 +632,15 @@ class TestNNAPI(TestCase):
            def forward(self, lhs, rhs):
                return func.mul(lhs, rhs)

-        for (name, mod) in [("add", AddMod), ("add_relu", AddReluMod), ("mul", MulMod)]:
+        for name, mod in [("add", AddMod), ("add_relu", AddReluMod), ("mul", MulMod)]:
            with self.subTest(name):
                self.check(
                    mod(),
                    [
                        qpt([1.0, 2.0], 0.25, 128),
                        qpt([3.0, 4.0], 0.25, 128),
-                    ])
+                    ],
+                )
                self.check(
                    mod(),
                    [
@ -619,7 +650,7 @@ class TestNNAPI(TestCase):
                    convert_args=[
                        qpt([[1.0, 2.0]], 0.25, 128),
                        qpt(torch.zeros((1, 2)), 0.25, 128),
-                    ]
+                    ],
                )
                self.check(
                    mod(),
@ -630,7 +661,7 @@ class TestNNAPI(TestCase):
                    convert_args=[
                        qpt(torch.zeros((1, 2)), 0.25, 128),
                        qpt([[3.0, 4.0]], 0.25, 128),
-                    ]
+                    ],
                )
                self.check(
                    mod(),
@ -641,7 +672,7 @@ class TestNNAPI(TestCase):
                    convert_args=[
                        qpt(torch.zeros((1, 2)), 0.25, 128),
                        qpt(torch.zeros((1, 2)), 0.25, 128),
-                    ]
+                    ],
                )
                # NOTE: NNAPI qadd supports broadcast, but PT does not.

@ -664,7 +695,8 @@ class TestNNAPI(TestCase):
            [
                nhwc(torch.randn(2, 3, 4, 4)),
                torch.randn(1, 3, 1, 1),
-            ])
+            ],
+        )

    def test_multi_output(self):
        class MultiModel(torch.nn.Module):
@ -676,5 +708,5 @@ class TestNNAPI(TestCase):
        self.check(MultiModel(), [torch.tensor([1.0, 2.0]), torch.tensor([1.0, 3.0])])


-if __name__ == '__main__':
+if __name__ == "__main__":
    run_tests()
--- a/test/test_numba_integration.py
+++ b/test/test_numba_integration.py
@ -2,12 +2,16 @@

 import unittest

-import torch.testing._internal.common_utils as common
-from torch.testing._internal.common_utils import TEST_NUMPY
-from torch.testing._internal.common_cuda import TEST_NUMBA_CUDA, TEST_CUDA, TEST_MULTIGPU
-
 import torch

+import torch.testing._internal.common_utils as common
+from torch.testing._internal.common_cuda import (
+    TEST_CUDA,
+    TEST_MULTIGPU,
+    TEST_NUMBA_CUDA,
+)
+from torch.testing._internal.common_utils import TEST_NUMPY
+
 if TEST_NUMPY:
    import numpy

@ -56,7 +60,6 @@ class TestNumbaIntegration(common.TestCase):
            numpy.uint8,
        ]
        for tp, npt in zip(types, dtypes):
-
            # CPU tensors do not implement the interface.
            cput = tp(10)

@ -117,7 +120,6 @@ class TestNumbaIntegration(common.TestCase):
        ]

        for dt in torch_dtypes:
-
            # CPU tensors of all types do not register as cuda arrays,
            # attempts to convert raise a type error.
            cput = torch.arange(10).to(dt)
@ -231,7 +233,9 @@ class TestNumbaIntegration(common.TestCase):
                numba.cuda.as_cuda_array(cudat), numba.cuda.devicearray.DeviceNDArray
            )

-    @unittest.skip("Test is temporary disabled, see https://github.com/pytorch/pytorch/issues/54418")
+    @unittest.skip(
+        "Test is temporary disabled, see https://github.com/pytorch/pytorch/issues/54418"
+    )
    @unittest.skipIf(not TEST_NUMPY, "No numpy")
    @unittest.skipIf(not TEST_CUDA, "No cuda")
    @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda")
@ -260,39 +264,59 @@ class TestNumbaIntegration(common.TestCase):
            numpy_arys = [
                numpy.ones((), dtype=dtype),
                numpy.arange(6).reshape(2, 3).astype(dtype),
-                numpy.arange(6).reshape(2, 3).astype(dtype)[1:],  # View offset should be ignored
-                numpy.arange(6).reshape(2, 3).astype(dtype)[:, None],  # change the strides but still contiguous
+                numpy.arange(6)
+                .reshape(2, 3)
+                .astype(dtype)[1:],  # View offset should be ignored
+                numpy.arange(6)
+                .reshape(2, 3)
+                .astype(dtype)[:, None],  # change the strides but still contiguous
            ]
            # Zero-copy when using `torch.as_tensor()`
            for numpy_ary in numpy_arys:
                numba_ary = numba.cuda.to_device(numpy_ary)
                torch_ary = torch.as_tensor(numba_ary, device="cuda")
-                self.assertEqual(numba_ary.__cuda_array_interface__, torch_ary.__cuda_array_interface__)
-                self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype))
+                self.assertEqual(
+                    numba_ary.__cuda_array_interface__,
+                    torch_ary.__cuda_array_interface__,
+                )
+                self.assertEqual(
+                    torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype)
+                )

                # Check that `torch_ary` and `numba_ary` points to the same device memory
                torch_ary += 42
-                self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype))
+                self.assertEqual(
+                    torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype)
+                )

            # Implicit-copy because `torch_ary` is a CPU array
            for numpy_ary in numpy_arys:
                numba_ary = numba.cuda.to_device(numpy_ary)
                torch_ary = torch.as_tensor(numba_ary, device="cpu")
-                self.assertEqual(torch_ary.data.numpy(), numpy.asarray(numba_ary, dtype=dtype))
+                self.assertEqual(
+                    torch_ary.data.numpy(), numpy.asarray(numba_ary, dtype=dtype)
+                )

                # Check that `torch_ary` and `numba_ary` points to different memory
                torch_ary += 42
-                self.assertEqual(torch_ary.data.numpy(), numpy.asarray(numba_ary, dtype=dtype) + 42)
+                self.assertEqual(
+                    torch_ary.data.numpy(), numpy.asarray(numba_ary, dtype=dtype) + 42
+                )

            # Explicit-copy when using `torch.tensor()`
            for numpy_ary in numpy_arys:
                numba_ary = numba.cuda.to_device(numpy_ary)
                torch_ary = torch.tensor(numba_ary, device="cuda")
-                self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype))
+                self.assertEqual(
+                    torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype)
+                )

                # Check that `torch_ary` and `numba_ary` points to different memory
                torch_ary += 42
-                self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary, dtype=dtype) + 42)
+                self.assertEqual(
+                    torch_ary.cpu().data.numpy(),
+                    numpy.asarray(numba_ary, dtype=dtype) + 42,
+                )

    @unittest.skipIf(not TEST_NUMPY, "No numpy")
    @unittest.skipIf(not TEST_CUDA, "No cuda")
@ -318,7 +342,9 @@ class TestNumbaIntegration(common.TestCase):
            torch_ary = torch.as_tensor(numba_ary, device="cuda")
            self.assertTrue(torch_ary.is_contiguous())

-    @unittest.skip("Test is temporary disabled, see https://github.com/pytorch/pytorch/issues/54418")
+    @unittest.skip(
+        "Test is temporary disabled, see https://github.com/pytorch/pytorch/issues/54418"
+    )
    @unittest.skipIf(not TEST_NUMPY, "No numpy")
    @unittest.skipIf(not TEST_CUDA, "No cuda")
    @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda")
@ -326,11 +352,17 @@ class TestNumbaIntegration(common.TestCase):
        """torch.as_tensor(obj) tensor grabs a reference to obj so that the lifetime of obj exceeds the tensor"""
        numba_ary = numba.cuda.to_device(numpy.arange(6))
        torch_ary = torch.as_tensor(numba_ary, device="cuda")
-        self.assertEqual(torch_ary.__cuda_array_interface__, numba_ary.__cuda_array_interface__)  # No copy
+        self.assertEqual(
+            torch_ary.__cuda_array_interface__, numba_ary.__cuda_array_interface__
+        )  # No copy
        del numba_ary
-        self.assertEqual(torch_ary.cpu().data.numpy(), numpy.arange(6))  # `torch_ary` is still alive
+        self.assertEqual(
+            torch_ary.cpu().data.numpy(), numpy.arange(6)
+        )  # `torch_ary` is still alive

-    @unittest.skip("Test is temporary disabled, see https://github.com/pytorch/pytorch/issues/54418")
+    @unittest.skip(
+        "Test is temporary disabled, see https://github.com/pytorch/pytorch/issues/54418"
+    )
    @unittest.skipIf(not TEST_NUMPY, "No numpy")
    @unittest.skipIf(not TEST_CUDA, "No cuda")
    @unittest.skipIf(not TEST_NUMBA_CUDA, "No numba.cuda")
@ -342,7 +374,9 @@ class TestNumbaIntegration(common.TestCase):
        numba_ary = numba.cuda.to_device(numpy.arange(6))
        torch_ary = torch.as_tensor(numba_ary, device="cuda")
        self.assertEqual(torch_ary.cpu().data.numpy(), numpy.asarray(numba_ary))
-        self.assertEqual(torch_ary.__cuda_array_interface__, numba_ary.__cuda_array_interface__)
+        self.assertEqual(
+            torch_ary.__cuda_array_interface__, numba_ary.__cuda_array_interface__
+        )

        # Implicit-copy: when the Numba and Torch device differ
        numba_ary = numba.cuda.to_device(numpy.arange(6))
--- a/test/test_numpy_interop.py
+++ b/test/test_numpy_interop.py
@ -2,18 +2,22 @@

 # Owner(s): ["module: numpy"]

-import torch
-import numpy as np
-
-from itertools import product
 import sys

-from torch.testing._internal.common_utils import \
-    (skipIfTorchDynamo, TestCase, run_tests)
-from torch.testing._internal.common_device_type import \
-    (instantiate_device_type_tests, onlyCPU, dtypes, skipMeta)
-from torch.testing._internal.common_dtype import all_types_and_complex_and
+from itertools import product
+
+import numpy as np
+import torch
 from torch.testing import make_tensor
+from torch.testing._internal.common_device_type import (
+    dtypes,
+    instantiate_device_type_tests,
+    onlyCPU,
+    skipMeta,
+)
+from torch.testing._internal.common_dtype import all_types_and_complex_and
+
+from torch.testing._internal.common_utils import run_tests, skipIfTorchDynamo, TestCase


 # For testing handling NumPy objects and sending tensors to / accepting
@ -25,7 +29,7 @@ class TestNumPyInterop(TestCase):
    @onlyCPU
    def test_numpy_non_writeable(self, device):
        arr = np.zeros(5)
-        arr.flags['WRITEABLE'] = False
+        arr.flags["WRITEABLE"] = False
        self.assertWarns(UserWarning, lambda: torch.from_numpy(arr))

    @onlyCPU
@ -103,7 +107,7 @@ class TestNumPyInterop(TestCase):
            x = get_castable_tensor((sz1, sz2), dtp)
            y = x.numpy()
            check2d(x, y)
-            self.assertTrue(y.flags['C_CONTIGUOUS'])
+            self.assertTrue(y.flags["C_CONTIGUOUS"])

            # with storage offset
            xm = get_castable_tensor((sz1 * 2, sz2), dtp)
@ -111,13 +115,13 @@ class TestNumPyInterop(TestCase):
            y = x.numpy()
            self.assertTrue(x.storage_offset() > 0)
            check2d(x, y)
-            self.assertTrue(y.flags['C_CONTIGUOUS'])
+            self.assertTrue(y.flags["C_CONTIGUOUS"])

            # non-contiguous 2D
            x = get_castable_tensor((sz2, sz1), dtp).t()
            y = x.numpy()
            check2d(x, y)
-            self.assertFalse(y.flags['C_CONTIGUOUS'])
+            self.assertFalse(y.flags["C_CONTIGUOUS"])

            # with storage offset
            xm = get_castable_tensor((sz2 * 2, sz1), dtp)
@ -168,7 +172,9 @@ class TestNumPyInterop(TestCase):
                for sparse in [False, True]:
                    for conj in [False, True]:
                        data = [[1 + 2j, -2 + 3j], [-1 - 2j, 3 - 2j]]
-                        x = torch.tensor(data, requires_grad=requires_grad, device=device)
+                        x = torch.tensor(
+                            data, requires_grad=requires_grad, device=device
+                        )
                        y = x
                        if sparse:
                            if requires_grad:
@ -177,13 +183,23 @@ class TestNumPyInterop(TestCase):
                        if conj:
                            x = x.conj()
                            y = x.resolve_conj()
-                        expect_error = requires_grad or sparse or conj or not device == 'cpu'
+                        expect_error = (
+                            requires_grad or sparse or conj or not device == "cpu"
+                        )
                        error_msg = r"Use (t|T)ensor\..*(\.numpy\(\))?"
                        if not force and expect_error:
-                            self.assertRaisesRegex((RuntimeError, TypeError), error_msg, lambda: x.numpy())
-                            self.assertRaisesRegex((RuntimeError, TypeError), error_msg, lambda: x.numpy(force=False))
+                            self.assertRaisesRegex(
+                                (RuntimeError, TypeError), error_msg, lambda: x.numpy()
+                            )
+                            self.assertRaisesRegex(
+                                (RuntimeError, TypeError),
+                                error_msg,
+                                lambda: x.numpy(force=False),
+                            )
                        elif force and sparse:
-                            self.assertRaisesRegex(TypeError, error_msg, lambda: x.numpy(force=True))
+                            self.assertRaisesRegex(
+                                TypeError, error_msg, lambda: x.numpy(force=True)
+                            )
                        else:
                            self.assertEqual(x.numpy(force=force), y)

@ -224,7 +240,7 @@ class TestNumPyInterop(TestCase):
                    self.assertEqual(tensor_from_array2[i], array2[i])

        # Test unsupported type
-        array = np.array(['foo', 'bar'], dtype=np.dtype(np.str_))
+        array = np.array(["foo", "bar"], dtype=np.dtype(np.str_))
        with self.assertRaises(TypeError):
            tensor_from_array = torch.from_numpy(array)

@ -255,7 +271,7 @@ class TestNumPyInterop(TestCase):
        self.assertEqual(torch.from_numpy(x).shape, (2, 0))

        # check ill-sized strides raise exception
-        x = np.array([3., 5., 8.])
+        x = np.array([3.0, 5.0, 8.0])
        x.strides = (3,)
        self.assertRaises(ValueError, lambda: torch.from_numpy(x))

@ -263,7 +279,7 @@ class TestNumPyInterop(TestCase):
    def test_from_numpy_no_leak_on_invalid_dtype(self):
        # This used to leak memory as the `from_numpy` call raised an exception and didn't decref the temporary
        # object. See https://github.com/pytorch/pytorch/issues/121138
-        x = np.array("value".encode('ascii'))
+        x = np.array("value".encode("ascii"))
        for _ in range(1000):
            try:
                torch.from_numpy(x)
@ -273,31 +289,52 @@ class TestNumPyInterop(TestCase):

    @skipMeta
    def test_from_list_of_ndarray_warning(self, device):
-        warning_msg = r"Creating a tensor from a list of numpy.ndarrays is extremely slow"
+        warning_msg = (
+            r"Creating a tensor from a list of numpy.ndarrays is extremely slow"
+        )
        with self.assertWarnsOnceRegex(UserWarning, warning_msg):
            torch.tensor([np.array([0]), np.array([1])], device=device)

    def test_ctor_with_invalid_numpy_array_sequence(self, device):
        # Invalid list of numpy array
        with self.assertRaisesRegex(ValueError, "expected sequence of length"):
-            torch.tensor([np.random.random(size=(3, 3)), np.random.random(size=(3, 0))], device=device)
+            torch.tensor(
+                [np.random.random(size=(3, 3)), np.random.random(size=(3, 0))],
+                device=device,
+            )

        # Invalid list of list of numpy array
        with self.assertRaisesRegex(ValueError, "expected sequence of length"):
-            torch.tensor([[np.random.random(size=(3, 3)), np.random.random(size=(3, 2))]], device=device)
+            torch.tensor(
+                [[np.random.random(size=(3, 3)), np.random.random(size=(3, 2))]],
+                device=device,
+            )

        with self.assertRaisesRegex(ValueError, "expected sequence of length"):
-            torch.tensor([[np.random.random(size=(3, 3)), np.random.random(size=(3, 3))],
-                          [np.random.random(size=(3, 3)), np.random.random(size=(3, 2))]], device=device)
+            torch.tensor(
+                [
+                    [np.random.random(size=(3, 3)), np.random.random(size=(3, 3))],
+                    [np.random.random(size=(3, 3)), np.random.random(size=(3, 2))],
+                ],
+                device=device,
+            )

        # expected shape is `[1, 2, 3]`, hence we try to iterate over 0-D array
        # leading to type error : not a sequence.
        with self.assertRaisesRegex(TypeError, "not a sequence"):
-            torch.tensor([[np.random.random(size=(3)), np.random.random()]], device=device)
+            torch.tensor(
+                [[np.random.random(size=(3)), np.random.random()]], device=device
+            )

        # list of list or numpy array.
        with self.assertRaisesRegex(ValueError, "expected sequence of length"):
-            torch.tensor([[1, 2, 3], np.random.random(size=(2,)), ], device=device)
+            torch.tensor(
+                [
+                    [1, 2, 3],
+                    np.random.random(size=(2,)),
+                ],
+                device=device,
+            )

    @onlyCPU
    def test_ctor_with_numpy_scalar_ctor(self, device) -> None:
@ -326,7 +363,7 @@ class TestNumPyInterop(TestCase):
    def test_numpy_index_multi(self, device):
        for dim_sz in [2, 8, 16, 32]:
            i = np.zeros((dim_sz, dim_sz, dim_sz), dtype=np.int32)
-            i[:dim_sz // 2, :, :] = 1
+            i[: dim_sz // 2, :, :] = 1
            x = torch.randn(dim_sz, dim_sz, dim_sz)
            self.assertTrue(x[i == 1].numel() == np.sum(i))

@ -352,7 +389,7 @@ class TestNumPyInterop(TestCase):
        ]
        for tp, dtype in zip(types, dtypes):
            # Only concrete class can be given where "Type[number[_64Bit]]" is expected
-            if np.dtype(dtype).kind == 'u':  # type: ignore[misc]
+            if np.dtype(dtype).kind == "u":  # type: ignore[misc]
                # .type expects a XxxTensor, which have no type hints on
                # purpose, so ignore during mypy type checking
                x = torch.tensor([1, 2, 3, 4]).type(tp)  # type: ignore[call-overload]
@ -381,7 +418,7 @@ class TestNumPyInterop(TestCase):
            asarray = np.asarray(x, dtype=dtype)
            self.assertEqual(asarray.dtype, dtype)
            # Only concrete class can be given where "Type[number[_64Bit]]" is expected
-            if np.dtype(dtype).kind == 'u':  # type: ignore[misc]
+            if np.dtype(dtype).kind == "u":  # type: ignore[misc]
                wrapped_x = np.array([1, -2, 3, -4], dtype=dtype)
                for i in range(len(x)):
                    self.assertEqual(asarray[i], wrapped_x[i])
@ -395,7 +432,7 @@ class TestNumPyInterop(TestCase):
        for tp, dtype in zip(float_types, float_dtypes):
            x = torch.tensor([1, 2, 3, 4]).type(tp)  # type: ignore[call-overload]
            array = np.array([1, 2, 3, 4], dtype=dtype)
-            for func in ['sin', 'sqrt', 'ceil']:
+            for func in ["sin", "sqrt", "ceil"]:
                ufunc = getattr(np, func)
                res_x = ufunc(x)
                res_array = ufunc(array)
@ -408,14 +445,21 @@ class TestNumPyInterop(TestCase):
            x = torch.tensor([1, 2, 3, 4]).type(tp)  # type: ignore[call-overload]
            array = np.array([1, 2, 3, 4], dtype=dtype)
            geq2_x = np.greater_equal(x, 2)
-            geq2_array = np.greater_equal(array, 2).astype('uint8')
+            geq2_array = np.greater_equal(array, 2).astype("uint8")
            self.assertIsInstance(geq2_x, torch.ByteTensor)
            for i in range(len(x)):
                self.assertEqual(geq2_x[i], geq2_array[i])

    @onlyCPU
    def test_multiplication_numpy_scalar(self, device) -> None:
-        for np_dtype in [np.float32, np.float64, np.int32, np.int64, np.int16, np.uint8]:
+        for np_dtype in [
+            np.float32,
+            np.float64,
+            np.int32,
+            np.int64,
+            np.int16,
+            np.uint8,
+        ]:
            for t_dtype in [torch.float, torch.double]:
                # mypy raises an error when np.floatXY(2.0) is called
                # even though this is valid code
@ -433,8 +477,11 @@ class TestNumPyInterop(TestCase):
    @onlyCPU
    def test_parse_numpy_int(self, device):
        # Only concrete class can be given where "Type[number[_64Bit]]" is expected
-        self.assertRaisesRegex(RuntimeError, "(Overflow|an integer is required)",
-                               lambda: torch.mean(torch.randn(1, 1), np.uint64(-1)))  # type: ignore[call-overload]
+        self.assertRaisesRegex(
+            RuntimeError,
+            "(Overflow|an integer is required)",
+            lambda: torch.mean(torch.randn(1, 1), np.uint64(-1)),
+        )  # type: ignore[call-overload]
        # https://github.com/pytorch/pytorch/issues/29252
        for nptype in [np.int16, np.int8, np.uint8, np.int32, np.int64]:
            scalar = 3
@ -444,7 +491,10 @@ class TestNumPyInterop(TestCase):
            # np integral type can be treated as a python int in native functions with
            # int parameters:
            self.assertEqual(torch.ones(5).diag(scalar), torch.ones(5).diag(np_val))
-            self.assertEqual(torch.ones([2, 2, 2, 2]).mean(scalar), torch.ones([2, 2, 2, 2]).mean(np_val))
+            self.assertEqual(
+                torch.ones([2, 2, 2, 2]).mean(scalar),
+                torch.ones([2, 2, 2, 2]).mean(np_val),
+            )

            # numpy integral type parses like a python int in custom python bindings:
            self.assertEqual(torch.Storage(np_val).size(), scalar)  # type: ignore[attr-defined]
@ -461,25 +511,40 @@ class TestNumPyInterop(TestCase):
            self.assertEqual((np_val + t).dtype, t.dtype)

    def test_has_storage_numpy(self, device):
-        for dtype in [np.float32, np.float64, np.int64,
-                      np.int32, np.int16, np.uint8]:
+        for dtype in [np.float32, np.float64, np.int64, np.int32, np.int16, np.uint8]:
            arr = np.array([1], dtype=dtype)
-            self.assertIsNotNone(torch.tensor(arr, device=device, dtype=torch.float32).storage())
-            self.assertIsNotNone(torch.tensor(arr, device=device, dtype=torch.double).storage())
-            self.assertIsNotNone(torch.tensor(arr, device=device, dtype=torch.int).storage())
-            self.assertIsNotNone(torch.tensor(arr, device=device, dtype=torch.long).storage())
-            self.assertIsNotNone(torch.tensor(arr, device=device, dtype=torch.uint8).storage())
+            self.assertIsNotNone(
+                torch.tensor(arr, device=device, dtype=torch.float32).storage()
+            )
+            self.assertIsNotNone(
+                torch.tensor(arr, device=device, dtype=torch.double).storage()
+            )
+            self.assertIsNotNone(
+                torch.tensor(arr, device=device, dtype=torch.int).storage()
+            )
+            self.assertIsNotNone(
+                torch.tensor(arr, device=device, dtype=torch.long).storage()
+            )
+            self.assertIsNotNone(
+                torch.tensor(arr, device=device, dtype=torch.uint8).storage()
+            )

    @dtypes(*all_types_and_complex_and(torch.half, torch.bfloat16, torch.bool))
    def test_numpy_scalar_cmp(self, device, dtype):
        if dtype.is_complex:
-            tensors = (torch.tensor(complex(1, 3), dtype=dtype, device=device),
-                       torch.tensor([complex(1, 3), 0, 2j], dtype=dtype, device=device),
-                       torch.tensor([[complex(3, 1), 0], [-1j, 5]], dtype=dtype, device=device))
+            tensors = (
+                torch.tensor(complex(1, 3), dtype=dtype, device=device),
+                torch.tensor([complex(1, 3), 0, 2j], dtype=dtype, device=device),
+                torch.tensor(
+                    [[complex(3, 1), 0], [-1j, 5]], dtype=dtype, device=device
+                ),
+            )
        else:
-            tensors = (torch.tensor(3, dtype=dtype, device=device),
-                       torch.tensor([1, 0, -3], dtype=dtype, device=device),
-                       torch.tensor([[3, 0, -1], [3, 5, 4]], dtype=dtype, device=device))
+            tensors = (
+                torch.tensor(3, dtype=dtype, device=device),
+                torch.tensor([1, 0, -3], dtype=dtype, device=device),
+                torch.tensor([[3, 0, -1], [3, 5, 4]], dtype=dtype, device=device),
+            )

        for tensor in tensors:
            if dtype == torch.bfloat16:
@ -488,10 +553,16 @@ class TestNumPyInterop(TestCase):
                continue

            np_array = tensor.cpu().numpy()
-            for t, a in product((tensor.flatten()[0], tensor.flatten()[0].item()),
-                                (np_array.flatten()[0], np_array.flatten()[0].item())):
+            for t, a in product(
+                (tensor.flatten()[0], tensor.flatten()[0].item()),
+                (np_array.flatten()[0], np_array.flatten()[0].item()),
+            ):
                self.assertEqual(t, a)
-                if dtype == torch.complex64 and torch.is_tensor(t) and type(a) == np.complex64:
+                if (
+                    dtype == torch.complex64
+                    and torch.is_tensor(t)
+                    and type(a) == np.complex64
+                ):
                    # TODO: Imaginary part is dropped in this case. Need fix.
                    # https://github.com/pytorch/pytorch/issues/43579
                    self.assertFalse(t == a)
@ -553,10 +624,10 @@ class TestNumPyInterop(TestCase):
        # Regression test for https://github.com/pytorch/pytorch/issues/115066
        self.assertEqual(torch.mul(x, y).shape, y.shape)
        # Regression test for https://github.com/pytorch/pytorch/issues/113037
-        self.assertEqual(torch.div(x, y, rounding_mode='floor').shape, y.shape)
+        self.assertEqual(torch.div(x, y, rounding_mode="floor").shape, y.shape)


 instantiate_device_type_tests(TestNumPyInterop, globals())

-if __name__ == '__main__':
+if __name__ == "__main__":
    run_tests()
--- a/test/test_openmp.py
+++ b/test/test_openmp.py
@ -4,16 +4,16 @@ import collections
 import unittest

 import torch
-from torch.testing._internal.common_utils import (
-    TestCase, run_tests, TEST_WITH_ASAN)
+from torch.testing._internal.common_utils import run_tests, TEST_WITH_ASAN, TestCase

 try:
    import psutil
+
    HAS_PSUTIL = True
 except ImportError:
    HAS_PSUTIL = False

-device = torch.device('cpu')
+device = torch.device("cpu")


 class Network(torch.nn.Module):
@ -50,21 +50,21 @@ class TestOpenMP_ParallelFor(TestCase):
            if idx == 0:
                continue
            is_increasing = is_increasing and (last_rss[idx] > last_rss[idx - 1])
-        self.assertTrue(not is_increasing,
-                        msg=f'memory usage is increasing, {str(last_rss)}')
+        self.assertTrue(
+            not is_increasing, msg=f"memory usage is increasing, {str(last_rss)}"
+        )

    def test_one_thread(self):
-        """Make sure there is no memory leak with one thread: issue gh-32284
-        """
+        """Make sure there is no memory leak with one thread: issue gh-32284"""
        torch.set_num_threads(1)
        self.func_rss(300)

    def test_n_threads(self):
-        """Make sure there is no memory leak with many threads
-        """
+        """Make sure there is no memory leak with many threads"""
        ncores = min(5, psutil.cpu_count(logical=False))
        torch.set_num_threads(ncores)
        self.func_rss(300)

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    run_tests()
--- a/test/test_optim.py
+++ b/test/test_optim.py